LLVM 20.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
18#include "RISCVRegisterInfo.h"
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
41#include "llvm/Support/Debug.h"
47#include <optional>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "riscv-lower"
52
53STATISTIC(NumTailCalls, "Number of tail calls");
54
56 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
57 cl::desc("Give the maximum size (in number of nodes) of the web of "
58 "instructions that we will consider for VW expansion"),
59 cl::init(18));
60
61static cl::opt<bool>
62 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
63 cl::desc("Allow the formation of VW_W operations (e.g., "
64 "VWADD_W) with splat constants"),
65 cl::init(false));
66
68 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
69 cl::desc("Set the minimum number of repetitions of a divisor to allow "
70 "transformation to multiplications by the reciprocal"),
71 cl::init(2));
72
73static cl::opt<int>
75 cl::desc("Give the maximum number of instructions that we will "
76 "use for creating a floating-point immediate value"),
77 cl::init(2));
78
80 const RISCVSubtarget &STI)
81 : TargetLowering(TM), Subtarget(STI) {
82
83 RISCVABI::ABI ABI = Subtarget.getTargetABI();
84 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
85
86 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
87 !Subtarget.hasStdExtF()) {
88 errs() << "Hard-float 'f' ABI can't be used for a target that "
89 "doesn't support the F instruction set extension (ignoring "
90 "target-abi)\n";
92 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
93 !Subtarget.hasStdExtD()) {
94 errs() << "Hard-float 'd' ABI can't be used for a target that "
95 "doesn't support the D instruction set extension (ignoring "
96 "target-abi)\n";
98 }
99
100 switch (ABI) {
101 default:
102 report_fatal_error("Don't know how to lower this ABI");
111 break;
112 }
113
114 MVT XLenVT = Subtarget.getXLenVT();
115
116 // Set up the register classes.
117 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
118
119 if (Subtarget.hasStdExtZfhmin())
120 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
121 if (Subtarget.hasStdExtZfbfmin())
122 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
123 if (Subtarget.hasStdExtF())
124 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
125 if (Subtarget.hasStdExtD())
126 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
127 if (Subtarget.hasStdExtZhinxmin())
128 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
129 if (Subtarget.hasStdExtZfinx())
130 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
131 if (Subtarget.hasStdExtZdinx()) {
132 if (Subtarget.is64Bit())
133 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
134 else
135 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
136 }
137
138 static const MVT::SimpleValueType BoolVecVTs[] = {
139 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
140 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
141 static const MVT::SimpleValueType IntVecVTs[] = {
142 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
143 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
144 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
145 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
146 MVT::nxv4i64, MVT::nxv8i64};
147 static const MVT::SimpleValueType F16VecVTs[] = {
148 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
149 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
150 static const MVT::SimpleValueType BF16VecVTs[] = {
151 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
152 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
153 static const MVT::SimpleValueType F32VecVTs[] = {
154 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
155 static const MVT::SimpleValueType F64VecVTs[] = {
156 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
157
158 if (Subtarget.hasVInstructions()) {
159 auto addRegClassForRVV = [this](MVT VT) {
160 // Disable the smallest fractional LMUL types if ELEN is less than
161 // RVVBitsPerBlock.
162 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
163 if (VT.getVectorMinNumElements() < MinElts)
164 return;
165
166 unsigned Size = VT.getSizeInBits().getKnownMinValue();
167 const TargetRegisterClass *RC;
169 RC = &RISCV::VRRegClass;
170 else if (Size == 2 * RISCV::RVVBitsPerBlock)
171 RC = &RISCV::VRM2RegClass;
172 else if (Size == 4 * RISCV::RVVBitsPerBlock)
173 RC = &RISCV::VRM4RegClass;
174 else if (Size == 8 * RISCV::RVVBitsPerBlock)
175 RC = &RISCV::VRM8RegClass;
176 else
177 llvm_unreachable("Unexpected size");
178
179 addRegisterClass(VT, RC);
180 };
181
182 for (MVT VT : BoolVecVTs)
183 addRegClassForRVV(VT);
184 for (MVT VT : IntVecVTs) {
185 if (VT.getVectorElementType() == MVT::i64 &&
186 !Subtarget.hasVInstructionsI64())
187 continue;
188 addRegClassForRVV(VT);
189 }
190
191 if (Subtarget.hasVInstructionsF16Minimal())
192 for (MVT VT : F16VecVTs)
193 addRegClassForRVV(VT);
194
195 if (Subtarget.hasVInstructionsBF16Minimal())
196 for (MVT VT : BF16VecVTs)
197 addRegClassForRVV(VT);
198
199 if (Subtarget.hasVInstructionsF32())
200 for (MVT VT : F32VecVTs)
201 addRegClassForRVV(VT);
202
203 if (Subtarget.hasVInstructionsF64())
204 for (MVT VT : F64VecVTs)
205 addRegClassForRVV(VT);
206
207 if (Subtarget.useRVVForFixedLengthVectors()) {
208 auto addRegClassForFixedVectors = [this](MVT VT) {
209 MVT ContainerVT = getContainerForFixedLengthVector(VT);
210 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
211 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
212 addRegisterClass(VT, TRI.getRegClass(RCID));
213 };
215 if (useRVVForFixedLengthVectorVT(VT))
216 addRegClassForFixedVectors(VT);
217
219 if (useRVVForFixedLengthVectorVT(VT))
220 addRegClassForFixedVectors(VT);
221 }
222 }
223
224 // Compute derived properties from the register classes.
226
228
230 MVT::i1, Promote);
231 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
233 MVT::i1, Promote);
234
235 // TODO: add all necessary setOperationAction calls.
237
242
247 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
250 }
251
253
256
258
260
261 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
262 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
263 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
264
265 if (Subtarget.is64Bit()) {
267
270 MVT::i32, Custom);
272 if (!Subtarget.hasStdExtZbb())
275 Custom);
277 }
278 if (!Subtarget.hasStdExtZmmul()) {
280 } else if (Subtarget.is64Bit()) {
283 } else {
285 }
286
287 if (!Subtarget.hasStdExtM()) {
289 Expand);
290 } else if (Subtarget.is64Bit()) {
292 {MVT::i8, MVT::i16, MVT::i32}, Custom);
293 }
294
297 Expand);
298
300 Custom);
301
302 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
303 if (Subtarget.is64Bit())
305 } else if (Subtarget.hasVendorXTHeadBb()) {
306 if (Subtarget.is64Bit())
309 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
311 } else {
313 }
314
315 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
316 // pattern match it directly in isel.
318 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
319 Subtarget.hasVendorXTHeadBb())
320 ? Legal
321 : Expand);
322
323 if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
325 } else {
326 // Zbkb can use rev8+brev8 to implement bitreverse.
328 Subtarget.hasStdExtZbkb() ? Custom : Expand);
329 }
330
331 if (Subtarget.hasStdExtZbb() ||
332 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
334 Legal);
335 }
336
337 if (Subtarget.hasStdExtZbb() ||
338 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
339 if (Subtarget.is64Bit())
341 } else {
343 }
344
345 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
346 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
347 // We need the custom lowering to make sure that the resulting sequence
348 // for the 32bit case is efficient on 64bit targets.
349 if (Subtarget.is64Bit())
351 } else {
353 }
354
355 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
357 } else if (Subtarget.hasShortForwardBranchOpt()) {
358 // We can use PseudoCCSUB to implement ABS.
360 } else if (Subtarget.is64Bit()) {
362 }
363
364 if (!Subtarget.hasVendorXTHeadCondMov())
366
367 static const unsigned FPLegalNodeTypes[] = {
374
375 static const ISD::CondCode FPCCToExpand[] = {
379
380 static const unsigned FPOpToExpand[] = {
382 ISD::FREM};
383
384 static const unsigned FPRndMode[] = {
387
388 if (Subtarget.hasStdExtZfhminOrZhinxmin())
390
391 static const unsigned ZfhminZfbfminPromoteOps[] = {
401
402 if (Subtarget.hasStdExtZfbfmin()) {
411 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
413 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
414 // DAGCombiner::visitFP_ROUND probably needs improvements first.
416 }
417
418 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
419 if (Subtarget.hasStdExtZfhOrZhinx()) {
420 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
421 setOperationAction(FPRndMode, MVT::f16,
422 Subtarget.hasStdExtZfa() ? Legal : Custom);
425 } else {
426 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
429 MVT::f16, Legal);
430 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
431 // DAGCombiner::visitFP_ROUND probably needs improvements first.
433 }
434
437 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
440
442 Subtarget.hasStdExtZfa() ? Legal : Promote);
447 MVT::f16, Promote);
448
449 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
450 // complete support for all operations in LegalizeDAG.
455 MVT::f16, Promote);
456
457 // We need to custom promote this.
458 if (Subtarget.is64Bit())
460
462 Subtarget.hasStdExtZfa() ? Legal : Custom);
463 }
464
465 if (Subtarget.hasStdExtFOrZfinx()) {
466 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
467 setOperationAction(FPRndMode, MVT::f32,
468 Subtarget.hasStdExtZfa() ? Legal : Custom);
469 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
473 setOperationAction(FPOpToExpand, MVT::f32, Expand);
474 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
475 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
476 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
477 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
481 Subtarget.isSoftFPABI() ? LibCall : Custom);
484
485 if (Subtarget.hasStdExtZfa()) {
488 } else {
490 }
491 }
492
493 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
495
496 if (Subtarget.hasStdExtDOrZdinx()) {
497 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
498
499 if (!Subtarget.is64Bit())
501
502 if (Subtarget.hasStdExtZfa()) {
503 setOperationAction(FPRndMode, MVT::f64, Legal);
506 } else {
507 if (Subtarget.is64Bit())
508 setOperationAction(FPRndMode, MVT::f64, Custom);
509
511 }
512
515 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
519 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
520 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
521 setOperationAction(FPOpToExpand, MVT::f64, Expand);
522 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
523 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
524 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
525 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
529 Subtarget.isSoftFPABI() ? LibCall : Custom);
532 }
533
534 if (Subtarget.is64Bit()) {
537 MVT::i32, Custom);
539 }
540
541 if (Subtarget.hasStdExtFOrZfinx()) {
543 Custom);
544
547 XLenVT, Legal);
548
551 }
552
555 XLenVT, Custom);
556
558
559 if (Subtarget.is64Bit())
561
562 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
563 // Unfortunately this can't be determined just from the ISA naming string.
565 Subtarget.is64Bit() ? Legal : Custom);
567 Subtarget.is64Bit() ? Legal : Custom);
568
571 if (Subtarget.is64Bit())
573
574 if (Subtarget.hasStdExtZicbop()) {
576 }
577
578 if (Subtarget.hasStdExtA()) {
580 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
582 else
584 } else if (Subtarget.hasForcedAtomics()) {
586 } else {
588 }
589
591
593
594 if (getTargetMachine().getTargetTriple().isOSLinux()) {
595 // Custom lowering of llvm.clear_cache.
597 }
598
599 if (Subtarget.hasVInstructions()) {
601
603
604 // RVV intrinsics may have illegal operands.
605 // We also need to custom legalize vmv.x.s.
608 {MVT::i8, MVT::i16}, Custom);
609 if (Subtarget.is64Bit())
611 MVT::i32, Custom);
612 else
614 MVT::i64, Custom);
615
617 MVT::Other, Custom);
618
619 static const unsigned IntegerVPOps[] = {
620 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
621 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
622 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
623 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
624 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
625 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
626 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
627 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
628 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
629 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
630 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
631 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
632 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
633 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
634 ISD::EXPERIMENTAL_VP_SPLAT};
635
636 static const unsigned FloatingPointVPOps[] = {
637 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
638 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
639 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
640 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
641 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
642 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
643 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
644 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
645 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
646 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
647 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
648 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
649 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
650 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
651
652 static const unsigned IntegerVecReduceOps[] = {
656
657 static const unsigned FloatingPointVecReduceOps[] = {
660
661 if (!Subtarget.is64Bit()) {
662 // We must custom-lower certain vXi64 operations on RV32 due to the vector
663 // element type being illegal.
665 MVT::i64, Custom);
666
667 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
668
669 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
670 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
671 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
672 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
673 MVT::i64, Custom);
674 }
675
676 for (MVT VT : BoolVecVTs) {
677 if (!isTypeLegal(VT))
678 continue;
679
681
682 // Mask VTs are custom-expanded into a series of standard nodes
686 VT, Custom);
687
689 Custom);
690
693 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
694 Expand);
695
696 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
697 Custom);
698
699 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
700
703 Custom);
704
706 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
707 Custom);
708
709 // RVV has native int->float & float->int conversions where the
710 // element type sizes are within one power-of-two of each other. Any
711 // wider distances between type sizes have to be lowered as sequences
712 // which progressively narrow the gap in stages.
717 VT, Custom);
719 Custom);
720
721 // Expand all extending loads to types larger than this, and truncating
722 // stores from types larger than this.
724 setTruncStoreAction(VT, OtherVT, Expand);
726 OtherVT, Expand);
727 }
728
729 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
730 ISD::VP_TRUNCATE, ISD::VP_SETCC},
731 VT, Custom);
732
735
737
738 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
739 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
740
743 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
744 }
745
746 for (MVT VT : IntVecVTs) {
747 if (!isTypeLegal(VT))
748 continue;
749
752
753 // Vectors implement MULHS/MULHU.
755
756 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
757 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
759
761 Legal);
762
764
765 // Custom-lower extensions and truncations from/to mask types.
767 VT, Custom);
768
769 // RVV has native int->float & float->int conversions where the
770 // element type sizes are within one power-of-two of each other. Any
771 // wider distances between type sizes have to be lowered as sequences
772 // which progressively narrow the gap in stages.
777 VT, Custom);
779 Custom);
783 VT, Legal);
784
785 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
786 // nodes which truncate by one power of two at a time.
788
789 // Custom-lower insert/extract operations to simplify patterns.
791 Custom);
792
793 // Custom-lower reduction operations to set up the corresponding custom
794 // nodes' operands.
795 setOperationAction(IntegerVecReduceOps, VT, Custom);
796
797 setOperationAction(IntegerVPOps, VT, Custom);
798
800
802 VT, Custom);
803
805 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
806 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
807 VT, Custom);
808
811 VT, Custom);
812
815
817
819 setTruncStoreAction(VT, OtherVT, Expand);
821 OtherVT, Expand);
822 }
823
826
827 // Splice
829
830 if (Subtarget.hasStdExtZvkb()) {
832 setOperationAction(ISD::VP_BSWAP, VT, Custom);
833 } else {
834 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
836 }
837
838 if (Subtarget.hasStdExtZvbb()) {
840 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
841 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
842 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
843 VT, Custom);
844 } else {
845 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
847 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
848 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
849 VT, Expand);
850
851 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
852 // range of f32.
853 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
854 if (isTypeLegal(FloatVT)) {
856 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
857 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
858 VT, Custom);
859 }
860 }
861 }
862
863 // Expand various CCs to best match the RVV ISA, which natively supports UNE
864 // but no other unordered comparisons, and supports all ordered comparisons
865 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
866 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
867 // and we pattern-match those back to the "original", swapping operands once
868 // more. This way we catch both operations and both "vf" and "fv" forms with
869 // fewer patterns.
870 static const ISD::CondCode VFPCCToExpand[] = {
874 };
875
876 // TODO: support more ops.
877 static const unsigned ZvfhminPromoteOps[] = {
885
886 // TODO: support more vp ops.
887 static const unsigned ZvfhminPromoteVPOps[] = {
888 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
889 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
890 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
891 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
892 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
893 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
894 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
895 ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
896 ISD::VP_FMAXIMUM, ISD::VP_REDUCE_FMINIMUM, ISD::VP_REDUCE_FMAXIMUM};
897
898 // Sets common operation actions on RVV floating-point vector types.
899 const auto SetCommonVFPActions = [&](MVT VT) {
901 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
902 // sizes are within one power-of-two of each other. Therefore conversions
903 // between vXf16 and vXf64 must be lowered as sequences which convert via
904 // vXf32.
907 // Custom-lower insert/extract operations to simplify patterns.
909 Custom);
910 // Expand various condition codes (explained above).
911 setCondCodeAction(VFPCCToExpand, VT, Expand);
912
915
919 VT, Custom);
920
921 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
922
923 // Expand FP operations that need libcalls.
935
937
939
941 VT, Custom);
942
944 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
945 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
946 VT, Custom);
947
950
953 VT, Custom);
954
957
959
960 setOperationAction(FloatingPointVPOps, VT, Custom);
961
963 Custom);
966 VT, Legal);
971 VT, Custom);
972 };
973
974 // Sets common extload/truncstore actions on RVV floating-point vector
975 // types.
976 const auto SetCommonVFPExtLoadTruncStoreActions =
977 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
978 for (auto SmallVT : SmallerVTs) {
979 setTruncStoreAction(VT, SmallVT, Expand);
980 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
981 }
982 };
983
984 if (Subtarget.hasVInstructionsF16()) {
985 for (MVT VT : F16VecVTs) {
986 if (!isTypeLegal(VT))
987 continue;
988 SetCommonVFPActions(VT);
989 }
990 } else if (Subtarget.hasVInstructionsF16Minimal()) {
991 for (MVT VT : F16VecVTs) {
992 if (!isTypeLegal(VT))
993 continue;
996 Custom);
997 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
998 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
999 Custom);
1002 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1003 VT, Custom);
1006 VT, Custom);
1007 if (Subtarget.hasStdExtZfhmin())
1009 // load/store
1011
1012 // Custom split nxv32f16 since nxv32f32 if not legal.
1013 if (VT == MVT::nxv32f16) {
1014 setOperationAction(ZvfhminPromoteOps, VT, Custom);
1015 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1016 continue;
1017 }
1018 // Add more promote ops.
1019 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1020 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1021 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1022 }
1023 }
1024
1025 // TODO: Could we merge some code with zvfhmin?
1026 if (Subtarget.hasVInstructionsBF16Minimal()) {
1027 for (MVT VT : BF16VecVTs) {
1028 if (!isTypeLegal(VT))
1029 continue;
1031 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1033 Custom);
1036 VT, Custom);
1038 if (Subtarget.hasStdExtZfbfmin())
1040 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1041 Custom);
1043 // TODO: Promote to fp32.
1044 }
1045 }
1046
1047 if (Subtarget.hasVInstructionsF32()) {
1048 for (MVT VT : F32VecVTs) {
1049 if (!isTypeLegal(VT))
1050 continue;
1051 SetCommonVFPActions(VT);
1052 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1053 }
1054 }
1055
1056 if (Subtarget.hasVInstructionsF64()) {
1057 for (MVT VT : F64VecVTs) {
1058 if (!isTypeLegal(VT))
1059 continue;
1060 SetCommonVFPActions(VT);
1061 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1062 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1063 }
1064 }
1065
1066 if (Subtarget.useRVVForFixedLengthVectors()) {
1068 if (!useRVVForFixedLengthVectorVT(VT))
1069 continue;
1070
1071 // By default everything must be expanded.
1072 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1075 setTruncStoreAction(VT, OtherVT, Expand);
1077 OtherVT, Expand);
1078 }
1079
1080 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1081 // expansion to a build_vector of 0s.
1083
1084 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1086 Custom);
1087
1089 Custom);
1090
1092 VT, Custom);
1093
1095
1097
1099
1101
1103
1105
1108 Custom);
1109
1111 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1112 Custom);
1113
1115 {
1124 },
1125 VT, Custom);
1127 Custom);
1128
1130
1131 // Operations below are different for between masks and other vectors.
1132 if (VT.getVectorElementType() == MVT::i1) {
1133 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1134 ISD::OR, ISD::XOR},
1135 VT, Custom);
1136
1137 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1138 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1139 VT, Custom);
1140
1141 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1142 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1143 continue;
1144 }
1145
1146 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1147 // it before type legalization for i64 vectors on RV32. It will then be
1148 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1149 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1150 // improvements first.
1151 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1154 }
1155
1158
1159 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1160 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1161 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1162 ISD::VP_SCATTER},
1163 VT, Custom);
1164
1168 VT, Custom);
1169
1172
1174
1175 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1176 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1178
1182 VT, Custom);
1183
1185
1188
1189 // Custom-lower reduction operations to set up the corresponding custom
1190 // nodes' operands.
1194 VT, Custom);
1195
1196 setOperationAction(IntegerVPOps, VT, Custom);
1197
1198 if (Subtarget.hasStdExtZvkb())
1200
1201 if (Subtarget.hasStdExtZvbb()) {
1204 VT, Custom);
1205 } else {
1206 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1207 // range of f32.
1208 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1209 if (isTypeLegal(FloatVT))
1212 Custom);
1213 }
1214 }
1215
1217 // There are no extending loads or truncating stores.
1218 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1219 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1220 setTruncStoreAction(VT, InnerVT, Expand);
1221 }
1222
1223 if (!useRVVForFixedLengthVectorVT(VT))
1224 continue;
1225
1226 // By default everything must be expanded.
1227 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1229
1230 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1231 // expansion to a build_vector of 0s.
1233
1236 VT, Custom);
1237
1238 // FIXME: mload, mstore, mgather, mscatter, vp_load/store,
1239 // vp_stride_load/store, vp_gather/scatter can be hoisted to here.
1241
1244 Custom);
1245
1246 if (VT.getVectorElementType() == MVT::f16 &&
1247 !Subtarget.hasVInstructionsF16()) {
1248 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1250 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1251 Custom);
1253 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1254 VT, Custom);
1256 if (Subtarget.hasStdExtZfhmin()) {
1257 // FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR.
1259 } else {
1260 // We need to custom legalize f16 build vectors if Zfhmin isn't
1261 // available.
1263 }
1264 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1265 // Don't promote f16 vector operations to f32 if f32 vector type is
1266 // not legal.
1267 // TODO: could split the f16 vector into two vectors and do promotion.
1268 if (!isTypeLegal(F32VecVT))
1269 continue;
1270 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1271 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1272 continue;
1273 }
1274
1275 if (VT.getVectorElementType() == MVT::bf16) {
1276 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1277 // FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR.
1280 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1281 Custom);
1282 // TODO: Promote to fp32.
1283 continue;
1284 }
1285
1288 VT, Custom);
1289
1292
1293 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1294 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1295 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1296 ISD::VP_SCATTER},
1297 VT, Custom);
1298
1303 VT, Custom);
1304
1307 VT, Custom);
1308
1309 setCondCodeAction(VFPCCToExpand, VT, Expand);
1310
1313
1315
1316 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1317
1318 setOperationAction(FloatingPointVPOps, VT, Custom);
1319
1326 VT, Custom);
1327 }
1328
1329 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1330 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1331 Custom);
1332 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1334 if (Subtarget.hasStdExtFOrZfinx())
1336 if (Subtarget.hasStdExtDOrZdinx())
1338 }
1339 }
1340
1341 if (Subtarget.hasStdExtA())
1343
1344 if (Subtarget.hasForcedAtomics()) {
1345 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1351 XLenVT, LibCall);
1352 }
1353
1354 if (Subtarget.hasVendorXTHeadMemIdx()) {
1355 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1356 setIndexedLoadAction(im, MVT::i8, Legal);
1357 setIndexedStoreAction(im, MVT::i8, Legal);
1358 setIndexedLoadAction(im, MVT::i16, Legal);
1359 setIndexedStoreAction(im, MVT::i16, Legal);
1360 setIndexedLoadAction(im, MVT::i32, Legal);
1361 setIndexedStoreAction(im, MVT::i32, Legal);
1362
1363 if (Subtarget.is64Bit()) {
1364 setIndexedLoadAction(im, MVT::i64, Legal);
1365 setIndexedStoreAction(im, MVT::i64, Legal);
1366 }
1367 }
1368 }
1369
1370 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1374
1378 }
1379
1380 // Function alignments.
1381 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1382 setMinFunctionAlignment(FunctionAlignment);
1383 // Set preferred alignments.
1386
1391
1392 if (Subtarget.hasStdExtFOrZfinx())
1394
1395 if (Subtarget.hasStdExtZbb())
1397
1398 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1399 Subtarget.hasVInstructions())
1401
1402 if (Subtarget.hasStdExtZbkb())
1404 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1406 if (Subtarget.hasStdExtFOrZfinx())
1409 if (Subtarget.hasVInstructions())
1411 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1414 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1417 if (Subtarget.hasVendorXTHeadMemPair())
1419 if (Subtarget.useRVVForFixedLengthVectors())
1421
1422 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1423 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1424
1425 // Disable strict node mutation.
1426 IsStrictFPEnabled = true;
1427
1428 // Let the subtarget decide if a predictable select is more expensive than the
1429 // corresponding branch. This information is used in CGP/SelectOpt to decide
1430 // when to convert selects into branches.
1431 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1432}
1433
1435 LLVMContext &Context,
1436 EVT VT) const {
1437 if (!VT.isVector())
1438 return getPointerTy(DL);
1439 if (Subtarget.hasVInstructions() &&
1440 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1441 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1443}
1444
1445MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1446 return Subtarget.getXLenVT();
1447}
1448
1449// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1450bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1451 unsigned VF,
1452 bool IsScalable) const {
1453 if (!Subtarget.hasVInstructions())
1454 return true;
1455
1456 if (!IsScalable)
1457 return true;
1458
1459 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1460 return true;
1461
1462 // Don't allow VF=1 if those types are't legal.
1463 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1464 return true;
1465
1466 // VLEN=32 support is incomplete.
1467 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1468 return true;
1469
1470 // The maximum VF is for the smallest element width with LMUL=8.
1471 // VF must be a power of 2.
1472 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1473 return VF > MaxVF || !isPowerOf2_32(VF);
1474}
1475
1477 return !Subtarget.hasVInstructions() ||
1478 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1479}
1480
1482 const CallInst &I,
1483 MachineFunction &MF,
1484 unsigned Intrinsic) const {
1485 auto &DL = I.getDataLayout();
1486
1487 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1488 bool IsUnitStrided, bool UsePtrVal = false) {
1490 // We can't use ptrVal if the intrinsic can access memory before the
1491 // pointer. This means we can't use it for strided or indexed intrinsics.
1492 if (UsePtrVal)
1493 Info.ptrVal = I.getArgOperand(PtrOp);
1494 else
1495 Info.fallbackAddressSpace =
1496 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1497 Type *MemTy;
1498 if (IsStore) {
1499 // Store value is the first operand.
1500 MemTy = I.getArgOperand(0)->getType();
1501 } else {
1502 // Use return type. If it's segment load, return type is a struct.
1503 MemTy = I.getType();
1504 if (MemTy->isStructTy())
1505 MemTy = MemTy->getStructElementType(0);
1506 }
1507 if (!IsUnitStrided)
1508 MemTy = MemTy->getScalarType();
1509
1510 Info.memVT = getValueType(DL, MemTy);
1511 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1513 Info.flags |=
1515 return true;
1516 };
1517
1518 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1520
1522 switch (Intrinsic) {
1523 default:
1524 return false;
1525 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1526 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1527 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1528 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1529 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1530 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1531 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1532 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1533 case Intrinsic::riscv_masked_cmpxchg_i32:
1535 Info.memVT = MVT::i32;
1536 Info.ptrVal = I.getArgOperand(0);
1537 Info.offset = 0;
1538 Info.align = Align(4);
1541 return true;
1542 case Intrinsic::riscv_seg2_load:
1543 case Intrinsic::riscv_seg3_load:
1544 case Intrinsic::riscv_seg4_load:
1545 case Intrinsic::riscv_seg5_load:
1546 case Intrinsic::riscv_seg6_load:
1547 case Intrinsic::riscv_seg7_load:
1548 case Intrinsic::riscv_seg8_load:
1549 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1550 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1551 case Intrinsic::riscv_seg2_store:
1552 case Intrinsic::riscv_seg3_store:
1553 case Intrinsic::riscv_seg4_store:
1554 case Intrinsic::riscv_seg5_store:
1555 case Intrinsic::riscv_seg6_store:
1556 case Intrinsic::riscv_seg7_store:
1557 case Intrinsic::riscv_seg8_store:
1558 // Operands are (vec, ..., vec, ptr, vl)
1559 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1560 /*IsStore*/ true,
1561 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1562 case Intrinsic::riscv_vle:
1563 case Intrinsic::riscv_vle_mask:
1564 case Intrinsic::riscv_vleff:
1565 case Intrinsic::riscv_vleff_mask:
1566 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1567 /*IsStore*/ false,
1568 /*IsUnitStrided*/ true,
1569 /*UsePtrVal*/ true);
1570 case Intrinsic::riscv_vse:
1571 case Intrinsic::riscv_vse_mask:
1572 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1573 /*IsStore*/ true,
1574 /*IsUnitStrided*/ true,
1575 /*UsePtrVal*/ true);
1576 case Intrinsic::riscv_vlse:
1577 case Intrinsic::riscv_vlse_mask:
1578 case Intrinsic::riscv_vloxei:
1579 case Intrinsic::riscv_vloxei_mask:
1580 case Intrinsic::riscv_vluxei:
1581 case Intrinsic::riscv_vluxei_mask:
1582 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1583 /*IsStore*/ false,
1584 /*IsUnitStrided*/ false);
1585 case Intrinsic::riscv_vsse:
1586 case Intrinsic::riscv_vsse_mask:
1587 case Intrinsic::riscv_vsoxei:
1588 case Intrinsic::riscv_vsoxei_mask:
1589 case Intrinsic::riscv_vsuxei:
1590 case Intrinsic::riscv_vsuxei_mask:
1591 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1592 /*IsStore*/ true,
1593 /*IsUnitStrided*/ false);
1594 case Intrinsic::riscv_vlseg2:
1595 case Intrinsic::riscv_vlseg3:
1596 case Intrinsic::riscv_vlseg4:
1597 case Intrinsic::riscv_vlseg5:
1598 case Intrinsic::riscv_vlseg6:
1599 case Intrinsic::riscv_vlseg7:
1600 case Intrinsic::riscv_vlseg8:
1601 case Intrinsic::riscv_vlseg2ff:
1602 case Intrinsic::riscv_vlseg3ff:
1603 case Intrinsic::riscv_vlseg4ff:
1604 case Intrinsic::riscv_vlseg5ff:
1605 case Intrinsic::riscv_vlseg6ff:
1606 case Intrinsic::riscv_vlseg7ff:
1607 case Intrinsic::riscv_vlseg8ff:
1608 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1609 /*IsStore*/ false,
1610 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1611 case Intrinsic::riscv_vlseg2_mask:
1612 case Intrinsic::riscv_vlseg3_mask:
1613 case Intrinsic::riscv_vlseg4_mask:
1614 case Intrinsic::riscv_vlseg5_mask:
1615 case Intrinsic::riscv_vlseg6_mask:
1616 case Intrinsic::riscv_vlseg7_mask:
1617 case Intrinsic::riscv_vlseg8_mask:
1618 case Intrinsic::riscv_vlseg2ff_mask:
1619 case Intrinsic::riscv_vlseg3ff_mask:
1620 case Intrinsic::riscv_vlseg4ff_mask:
1621 case Intrinsic::riscv_vlseg5ff_mask:
1622 case Intrinsic::riscv_vlseg6ff_mask:
1623 case Intrinsic::riscv_vlseg7ff_mask:
1624 case Intrinsic::riscv_vlseg8ff_mask:
1625 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1626 /*IsStore*/ false,
1627 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1628 case Intrinsic::riscv_vlsseg2:
1629 case Intrinsic::riscv_vlsseg3:
1630 case Intrinsic::riscv_vlsseg4:
1631 case Intrinsic::riscv_vlsseg5:
1632 case Intrinsic::riscv_vlsseg6:
1633 case Intrinsic::riscv_vlsseg7:
1634 case Intrinsic::riscv_vlsseg8:
1635 case Intrinsic::riscv_vloxseg2:
1636 case Intrinsic::riscv_vloxseg3:
1637 case Intrinsic::riscv_vloxseg4:
1638 case Intrinsic::riscv_vloxseg5:
1639 case Intrinsic::riscv_vloxseg6:
1640 case Intrinsic::riscv_vloxseg7:
1641 case Intrinsic::riscv_vloxseg8:
1642 case Intrinsic::riscv_vluxseg2:
1643 case Intrinsic::riscv_vluxseg3:
1644 case Intrinsic::riscv_vluxseg4:
1645 case Intrinsic::riscv_vluxseg5:
1646 case Intrinsic::riscv_vluxseg6:
1647 case Intrinsic::riscv_vluxseg7:
1648 case Intrinsic::riscv_vluxseg8:
1649 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1650 /*IsStore*/ false,
1651 /*IsUnitStrided*/ false);
1652 case Intrinsic::riscv_vlsseg2_mask:
1653 case Intrinsic::riscv_vlsseg3_mask:
1654 case Intrinsic::riscv_vlsseg4_mask:
1655 case Intrinsic::riscv_vlsseg5_mask:
1656 case Intrinsic::riscv_vlsseg6_mask:
1657 case Intrinsic::riscv_vlsseg7_mask:
1658 case Intrinsic::riscv_vlsseg8_mask:
1659 case Intrinsic::riscv_vloxseg2_mask:
1660 case Intrinsic::riscv_vloxseg3_mask:
1661 case Intrinsic::riscv_vloxseg4_mask:
1662 case Intrinsic::riscv_vloxseg5_mask:
1663 case Intrinsic::riscv_vloxseg6_mask:
1664 case Intrinsic::riscv_vloxseg7_mask:
1665 case Intrinsic::riscv_vloxseg8_mask:
1666 case Intrinsic::riscv_vluxseg2_mask:
1667 case Intrinsic::riscv_vluxseg3_mask:
1668 case Intrinsic::riscv_vluxseg4_mask:
1669 case Intrinsic::riscv_vluxseg5_mask:
1670 case Intrinsic::riscv_vluxseg6_mask:
1671 case Intrinsic::riscv_vluxseg7_mask:
1672 case Intrinsic::riscv_vluxseg8_mask:
1673 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1674 /*IsStore*/ false,
1675 /*IsUnitStrided*/ false);
1676 case Intrinsic::riscv_vsseg2:
1677 case Intrinsic::riscv_vsseg3:
1678 case Intrinsic::riscv_vsseg4:
1679 case Intrinsic::riscv_vsseg5:
1680 case Intrinsic::riscv_vsseg6:
1681 case Intrinsic::riscv_vsseg7:
1682 case Intrinsic::riscv_vsseg8:
1683 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1684 /*IsStore*/ true,
1685 /*IsUnitStrided*/ false);
1686 case Intrinsic::riscv_vsseg2_mask:
1687 case Intrinsic::riscv_vsseg3_mask:
1688 case Intrinsic::riscv_vsseg4_mask:
1689 case Intrinsic::riscv_vsseg5_mask:
1690 case Intrinsic::riscv_vsseg6_mask:
1691 case Intrinsic::riscv_vsseg7_mask:
1692 case Intrinsic::riscv_vsseg8_mask:
1693 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1694 /*IsStore*/ true,
1695 /*IsUnitStrided*/ false);
1696 case Intrinsic::riscv_vssseg2:
1697 case Intrinsic::riscv_vssseg3:
1698 case Intrinsic::riscv_vssseg4:
1699 case Intrinsic::riscv_vssseg5:
1700 case Intrinsic::riscv_vssseg6:
1701 case Intrinsic::riscv_vssseg7:
1702 case Intrinsic::riscv_vssseg8:
1703 case Intrinsic::riscv_vsoxseg2:
1704 case Intrinsic::riscv_vsoxseg3:
1705 case Intrinsic::riscv_vsoxseg4:
1706 case Intrinsic::riscv_vsoxseg5:
1707 case Intrinsic::riscv_vsoxseg6:
1708 case Intrinsic::riscv_vsoxseg7:
1709 case Intrinsic::riscv_vsoxseg8:
1710 case Intrinsic::riscv_vsuxseg2:
1711 case Intrinsic::riscv_vsuxseg3:
1712 case Intrinsic::riscv_vsuxseg4:
1713 case Intrinsic::riscv_vsuxseg5:
1714 case Intrinsic::riscv_vsuxseg6:
1715 case Intrinsic::riscv_vsuxseg7:
1716 case Intrinsic::riscv_vsuxseg8:
1717 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1718 /*IsStore*/ true,
1719 /*IsUnitStrided*/ false);
1720 case Intrinsic::riscv_vssseg2_mask:
1721 case Intrinsic::riscv_vssseg3_mask:
1722 case Intrinsic::riscv_vssseg4_mask:
1723 case Intrinsic::riscv_vssseg5_mask:
1724 case Intrinsic::riscv_vssseg6_mask:
1725 case Intrinsic::riscv_vssseg7_mask:
1726 case Intrinsic::riscv_vssseg8_mask:
1727 case Intrinsic::riscv_vsoxseg2_mask:
1728 case Intrinsic::riscv_vsoxseg3_mask:
1729 case Intrinsic::riscv_vsoxseg4_mask:
1730 case Intrinsic::riscv_vsoxseg5_mask:
1731 case Intrinsic::riscv_vsoxseg6_mask:
1732 case Intrinsic::riscv_vsoxseg7_mask:
1733 case Intrinsic::riscv_vsoxseg8_mask:
1734 case Intrinsic::riscv_vsuxseg2_mask:
1735 case Intrinsic::riscv_vsuxseg3_mask:
1736 case Intrinsic::riscv_vsuxseg4_mask:
1737 case Intrinsic::riscv_vsuxseg5_mask:
1738 case Intrinsic::riscv_vsuxseg6_mask:
1739 case Intrinsic::riscv_vsuxseg7_mask:
1740 case Intrinsic::riscv_vsuxseg8_mask:
1741 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1742 /*IsStore*/ true,
1743 /*IsUnitStrided*/ false);
1744 }
1745}
1746
1748 const AddrMode &AM, Type *Ty,
1749 unsigned AS,
1750 Instruction *I) const {
1751 // No global is ever allowed as a base.
1752 if (AM.BaseGV)
1753 return false;
1754
1755 // None of our addressing modes allows a scalable offset
1756 if (AM.ScalableOffset)
1757 return false;
1758
1759 // RVV instructions only support register addressing.
1760 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1761 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1762
1763 // Require a 12-bit signed offset.
1764 if (!isInt<12>(AM.BaseOffs))
1765 return false;
1766
1767 switch (AM.Scale) {
1768 case 0: // "r+i" or just "i", depending on HasBaseReg.
1769 break;
1770 case 1:
1771 if (!AM.HasBaseReg) // allow "r+i".
1772 break;
1773 return false; // disallow "r+r" or "r+r+i".
1774 default:
1775 return false;
1776 }
1777
1778 return true;
1779}
1780
1782 return isInt<12>(Imm);
1783}
1784
1786 return isInt<12>(Imm);
1787}
1788
1789// On RV32, 64-bit integers are split into their high and low parts and held
1790// in two different registers, so the trunc is free since the low register can
1791// just be used.
1792// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1793// isTruncateFree?
1795 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1796 return false;
1797 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1798 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1799 return (SrcBits == 64 && DestBits == 32);
1800}
1801
1803 // We consider i64->i32 free on RV64 since we have good selection of W
1804 // instructions that make promoting operations back to i64 free in many cases.
1805 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1806 !DstVT.isInteger())
1807 return false;
1808 unsigned SrcBits = SrcVT.getSizeInBits();
1809 unsigned DestBits = DstVT.getSizeInBits();
1810 return (SrcBits == 64 && DestBits == 32);
1811}
1812
1814 EVT SrcVT = Val.getValueType();
1815 // free truncate from vnsrl and vnsra
1816 if (Subtarget.hasVInstructions() &&
1817 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
1818 SrcVT.isVector() && VT2.isVector()) {
1819 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
1820 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
1821 if (SrcBits == DestBits * 2) {
1822 return true;
1823 }
1824 }
1825 return TargetLowering::isTruncateFree(Val, VT2);
1826}
1827
1829 // Zexts are free if they can be combined with a load.
1830 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1831 // poorly with type legalization of compares preferring sext.
1832 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1833 EVT MemVT = LD->getMemoryVT();
1834 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1835 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1836 LD->getExtensionType() == ISD::ZEXTLOAD))
1837 return true;
1838 }
1839
1840 return TargetLowering::isZExtFree(Val, VT2);
1841}
1842
1844 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1845}
1846
1848 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1849}
1850
1852 return Subtarget.hasStdExtZbb() ||
1853 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
1854}
1855
1857 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
1858 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
1859}
1860
1862 const Instruction &AndI) const {
1863 // We expect to be able to match a bit extraction instruction if the Zbs
1864 // extension is supported and the mask is a power of two. However, we
1865 // conservatively return false if the mask would fit in an ANDI instruction,
1866 // on the basis that it's possible the sinking+duplication of the AND in
1867 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1868 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1869 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1870 return false;
1871 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1872 if (!Mask)
1873 return false;
1874 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1875}
1876
1878 EVT VT = Y.getValueType();
1879
1880 // FIXME: Support vectors once we have tests.
1881 if (VT.isVector())
1882 return false;
1883
1884 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1885 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
1886}
1887
1889 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1890 if (Subtarget.hasStdExtZbs())
1891 return X.getValueType().isScalarInteger();
1892 auto *C = dyn_cast<ConstantSDNode>(Y);
1893 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1894 if (Subtarget.hasVendorXTHeadBs())
1895 return C != nullptr;
1896 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1897 return C && C->getAPIntValue().ule(10);
1898}
1899
1901 EVT VT) const {
1902 // Only enable for rvv.
1903 if (!VT.isVector() || !Subtarget.hasVInstructions())
1904 return false;
1905
1906 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1907 return false;
1908
1909 return true;
1910}
1911
1913 Type *Ty) const {
1914 assert(Ty->isIntegerTy());
1915
1916 unsigned BitSize = Ty->getIntegerBitWidth();
1917 if (BitSize > Subtarget.getXLen())
1918 return false;
1919
1920 // Fast path, assume 32-bit immediates are cheap.
1921 int64_t Val = Imm.getSExtValue();
1922 if (isInt<32>(Val))
1923 return true;
1924
1925 // A constant pool entry may be more aligned thant he load we're trying to
1926 // replace. If we don't support unaligned scalar mem, prefer the constant
1927 // pool.
1928 // TODO: Can the caller pass down the alignment?
1929 if (!Subtarget.enableUnalignedScalarMem())
1930 return true;
1931
1932 // Prefer to keep the load if it would require many instructions.
1933 // This uses the same threshold we use for constant pools but doesn't
1934 // check useConstantPoolForLargeInts.
1935 // TODO: Should we keep the load only when we're definitely going to emit a
1936 // constant pool?
1937
1939 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1940}
1941
1945 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1946 SelectionDAG &DAG) const {
1947 // One interesting pattern that we'd want to form is 'bit extract':
1948 // ((1 >> Y) & 1) ==/!= 0
1949 // But we also need to be careful not to try to reverse that fold.
1950
1951 // Is this '((1 >> Y) & 1)'?
1952 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1953 return false; // Keep the 'bit extract' pattern.
1954
1955 // Will this be '((1 >> Y) & 1)' after the transform?
1956 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1957 return true; // Do form the 'bit extract' pattern.
1958
1959 // If 'X' is a constant, and we transform, then we will immediately
1960 // try to undo the fold, thus causing endless combine loop.
1961 // So only do the transform if X is not a constant. This matches the default
1962 // implementation of this function.
1963 return !XC;
1964}
1965
1966bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1967 switch (Opcode) {
1968 case Instruction::Add:
1969 case Instruction::Sub:
1970 case Instruction::Mul:
1971 case Instruction::And:
1972 case Instruction::Or:
1973 case Instruction::Xor:
1974 case Instruction::FAdd:
1975 case Instruction::FSub:
1976 case Instruction::FMul:
1977 case Instruction::FDiv:
1978 case Instruction::ICmp:
1979 case Instruction::FCmp:
1980 return true;
1981 case Instruction::Shl:
1982 case Instruction::LShr:
1983 case Instruction::AShr:
1984 case Instruction::UDiv:
1985 case Instruction::SDiv:
1986 case Instruction::URem:
1987 case Instruction::SRem:
1988 case Instruction::Select:
1989 return Operand == 1;
1990 default:
1991 return false;
1992 }
1993}
1994
1995
1997 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1998 return false;
1999
2000 if (canSplatOperand(I->getOpcode(), Operand))
2001 return true;
2002
2003 auto *II = dyn_cast<IntrinsicInst>(I);
2004 if (!II)
2005 return false;
2006
2007 switch (II->getIntrinsicID()) {
2008 case Intrinsic::fma:
2009 case Intrinsic::vp_fma:
2010 return Operand == 0 || Operand == 1;
2011 case Intrinsic::vp_shl:
2012 case Intrinsic::vp_lshr:
2013 case Intrinsic::vp_ashr:
2014 case Intrinsic::vp_udiv:
2015 case Intrinsic::vp_sdiv:
2016 case Intrinsic::vp_urem:
2017 case Intrinsic::vp_srem:
2018 case Intrinsic::ssub_sat:
2019 case Intrinsic::vp_ssub_sat:
2020 case Intrinsic::usub_sat:
2021 case Intrinsic::vp_usub_sat:
2022 return Operand == 1;
2023 // These intrinsics are commutative.
2024 case Intrinsic::vp_add:
2025 case Intrinsic::vp_mul:
2026 case Intrinsic::vp_and:
2027 case Intrinsic::vp_or:
2028 case Intrinsic::vp_xor:
2029 case Intrinsic::vp_fadd:
2030 case Intrinsic::vp_fmul:
2031 case Intrinsic::vp_icmp:
2032 case Intrinsic::vp_fcmp:
2033 case Intrinsic::smin:
2034 case Intrinsic::vp_smin:
2035 case Intrinsic::umin:
2036 case Intrinsic::vp_umin:
2037 case Intrinsic::smax:
2038 case Intrinsic::vp_smax:
2039 case Intrinsic::umax:
2040 case Intrinsic::vp_umax:
2041 case Intrinsic::sadd_sat:
2042 case Intrinsic::vp_sadd_sat:
2043 case Intrinsic::uadd_sat:
2044 case Intrinsic::vp_uadd_sat:
2045 // These intrinsics have 'vr' versions.
2046 case Intrinsic::vp_sub:
2047 case Intrinsic::vp_fsub:
2048 case Intrinsic::vp_fdiv:
2049 return Operand == 0 || Operand == 1;
2050 default:
2051 return false;
2052 }
2053}
2054
2055/// Check if sinking \p I's operands to I's basic block is profitable, because
2056/// the operands can be folded into a target instruction, e.g.
2057/// splats of scalars can fold into vector instructions.
2059 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
2060 using namespace llvm::PatternMatch;
2061
2062 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2063 return false;
2064
2065 // Don't sink splat operands if the target prefers it. Some targets requires
2066 // S2V transfer buffers and we can run out of them copying the same value
2067 // repeatedly.
2068 // FIXME: It could still be worth doing if it would improve vector register
2069 // pressure and prevent a vector spill.
2070 if (!Subtarget.sinkSplatOperands())
2071 return false;
2072
2073 for (auto OpIdx : enumerate(I->operands())) {
2074 if (!canSplatOperand(I, OpIdx.index()))
2075 continue;
2076
2077 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2078 // Make sure we are not already sinking this operand
2079 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2080 continue;
2081
2082 // We are looking for a splat that can be sunk.
2084 m_Undef(), m_ZeroMask())))
2085 continue;
2086
2087 // Don't sink i1 splats.
2088 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2089 continue;
2090
2091 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2092 // and vector registers
2093 for (Use &U : Op->uses()) {
2094 Instruction *Insn = cast<Instruction>(U.getUser());
2095 if (!canSplatOperand(Insn, U.getOperandNo()))
2096 return false;
2097 }
2098
2099 Ops.push_back(&Op->getOperandUse(0));
2100 Ops.push_back(&OpIdx.value());
2101 }
2102 return true;
2103}
2104
2106 unsigned Opc = VecOp.getOpcode();
2107
2108 // Assume target opcodes can't be scalarized.
2109 // TODO - do we have any exceptions?
2110 if (Opc >= ISD::BUILTIN_OP_END)
2111 return false;
2112
2113 // If the vector op is not supported, try to convert to scalar.
2114 EVT VecVT = VecOp.getValueType();
2115 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2116 return true;
2117
2118 // If the vector op is supported, but the scalar op is not, the transform may
2119 // not be worthwhile.
2120 // Permit a vector binary operation can be converted to scalar binary
2121 // operation which is custom lowered with illegal type.
2122 EVT ScalarVT = VecVT.getScalarType();
2123 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2124 isOperationCustom(Opc, ScalarVT);
2125}
2126
2128 const GlobalAddressSDNode *GA) const {
2129 // In order to maximise the opportunity for common subexpression elimination,
2130 // keep a separate ADD node for the global address offset instead of folding
2131 // it in the global address node. Later peephole optimisations may choose to
2132 // fold it back in when profitable.
2133 return false;
2134}
2135
2136// Return one of the followings:
2137// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2138// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2139// positive counterpart, which will be materialized from the first returned
2140// element. The second returned element indicated that there should be a FNEG
2141// followed.
2142// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2143std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2144 EVT VT) const {
2145 if (!Subtarget.hasStdExtZfa())
2146 return std::make_pair(-1, false);
2147
2148 bool IsSupportedVT = false;
2149 if (VT == MVT::f16) {
2150 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2151 } else if (VT == MVT::f32) {
2152 IsSupportedVT = true;
2153 } else if (VT == MVT::f64) {
2154 assert(Subtarget.hasStdExtD() && "Expect D extension");
2155 IsSupportedVT = true;
2156 }
2157
2158 if (!IsSupportedVT)
2159 return std::make_pair(-1, false);
2160
2162 if (Index < 0 && Imm.isNegative())
2163 // Try the combination of its positive counterpart + FNEG.
2164 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
2165 else
2166 return std::make_pair(Index, false);
2167}
2168
2170 bool ForCodeSize) const {
2171 bool IsLegalVT = false;
2172 if (VT == MVT::f16)
2173 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2174 else if (VT == MVT::f32)
2175 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2176 else if (VT == MVT::f64)
2177 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2178 else if (VT == MVT::bf16)
2179 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2180
2181 if (!IsLegalVT)
2182 return false;
2183
2184 if (getLegalZfaFPImm(Imm, VT).first >= 0)
2185 return true;
2186
2187 // Cannot create a 64 bit floating-point immediate value for rv32.
2188 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2189 // td can handle +0.0 or -0.0 already.
2190 // -0.0 can be created by fmv + fneg.
2191 return Imm.isZero();
2192 }
2193
2194 // Special case: fmv + fneg
2195 if (Imm.isNegZero())
2196 return true;
2197
2198 // Building an integer and then converting requires a fmv at the end of
2199 // the integer sequence.
2200 const int Cost =
2201 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2202 Subtarget);
2203 return Cost <= FPImmCost;
2204}
2205
2206// TODO: This is very conservative.
2208 unsigned Index) const {
2210 return false;
2211
2212 // Only support extracting a fixed from a fixed vector for now.
2213 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2214 return false;
2215
2216 EVT EltVT = ResVT.getVectorElementType();
2217 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2218
2219 // The smallest type we can slide is i8.
2220 // TODO: We can extract index 0 from a mask vector without a slide.
2221 if (EltVT == MVT::i1)
2222 return false;
2223
2224 unsigned ResElts = ResVT.getVectorNumElements();
2225 unsigned SrcElts = SrcVT.getVectorNumElements();
2226
2227 unsigned MinVLen = Subtarget.getRealMinVLen();
2228 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2229
2230 // If we're extracting only data from the first VLEN bits of the source
2231 // then we can always do this with an m1 vslidedown.vx. Restricting the
2232 // Index ensures we can use a vslidedown.vi.
2233 // TODO: We can generalize this when the exact VLEN is known.
2234 if (Index + ResElts <= MinVLMAX && Index < 31)
2235 return true;
2236
2237 // Convervatively only handle extracting half of a vector.
2238 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2239 // a cheap extract. However, this case is important in practice for
2240 // shuffled extracts of longer vectors. How resolve?
2241 if ((ResElts * 2) != SrcElts)
2242 return false;
2243
2244 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2245 // cheap.
2246 if (Index >= 32)
2247 return false;
2248
2249 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2250 // the upper half of a vector until we have more test coverage.
2251 return Index == 0 || Index == ResElts;
2252}
2253
2256 EVT VT) const {
2257 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2258 // We might still end up using a GPR but that will be decided based on ABI.
2259 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2260 !Subtarget.hasStdExtZfhminOrZhinxmin())
2261 return MVT::f32;
2262
2264
2265 return PartVT;
2266}
2267
2270 EVT VT) const {
2271 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2272 // We might still end up using a GPR but that will be decided based on ABI.
2273 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2274 !Subtarget.hasStdExtZfhminOrZhinxmin())
2275 return 1;
2276
2278}
2279
2281 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2282 unsigned &NumIntermediates, MVT &RegisterVT) const {
2284 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2285
2286 return NumRegs;
2287}
2288
2289// Changes the condition code and swaps operands if necessary, so the SetCC
2290// operation matches one of the comparisons supported directly by branches
2291// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2292// with 1/-1.
2293static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2294 ISD::CondCode &CC, SelectionDAG &DAG) {
2295 // If this is a single bit test that can't be handled by ANDI, shift the
2296 // bit to be tested to the MSB and perform a signed compare with 0.
2297 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2298 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2299 isa<ConstantSDNode>(LHS.getOperand(1))) {
2300 uint64_t Mask = LHS.getConstantOperandVal(1);
2301 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2302 unsigned ShAmt = 0;
2303 if (isPowerOf2_64(Mask)) {
2305 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2306 } else {
2307 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2308 }
2309
2310 LHS = LHS.getOperand(0);
2311 if (ShAmt != 0)
2312 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2313 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2314 return;
2315 }
2316 }
2317
2318 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2319 int64_t C = RHSC->getSExtValue();
2320 switch (CC) {
2321 default: break;
2322 case ISD::SETGT:
2323 // Convert X > -1 to X >= 0.
2324 if (C == -1) {
2325 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2326 CC = ISD::SETGE;
2327 return;
2328 }
2329 break;
2330 case ISD::SETLT:
2331 // Convert X < 1 to 0 >= X.
2332 if (C == 1) {
2333 RHS = LHS;
2334 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2335 CC = ISD::SETGE;
2336 return;
2337 }
2338 break;
2339 }
2340 }
2341
2342 switch (CC) {
2343 default:
2344 break;
2345 case ISD::SETGT:
2346 case ISD::SETLE:
2347 case ISD::SETUGT:
2348 case ISD::SETULE:
2350 std::swap(LHS, RHS);
2351 break;
2352 }
2353}
2354
2356 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2357 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2358 if (VT.getVectorElementType() == MVT::i1)
2359 KnownSize *= 8;
2360
2361 switch (KnownSize) {
2362 default:
2363 llvm_unreachable("Invalid LMUL.");
2364 case 8:
2366 case 16:
2368 case 32:
2370 case 64:
2372 case 128:
2374 case 256:
2376 case 512:
2378 }
2379}
2380
2382 switch (LMul) {
2383 default:
2384 llvm_unreachable("Invalid LMUL.");
2389 return RISCV::VRRegClassID;
2391 return RISCV::VRM2RegClassID;
2393 return RISCV::VRM4RegClassID;
2395 return RISCV::VRM8RegClassID;
2396 }
2397}
2398
2400 RISCVII::VLMUL LMUL = getLMUL(VT);
2401 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2402 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2403 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2404 LMUL == RISCVII::VLMUL::LMUL_1) {
2405 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2406 "Unexpected subreg numbering");
2407 return RISCV::sub_vrm1_0 + Index;
2408 }
2409 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2410 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2411 "Unexpected subreg numbering");
2412 return RISCV::sub_vrm2_0 + Index;
2413 }
2414 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2415 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2416 "Unexpected subreg numbering");
2417 return RISCV::sub_vrm4_0 + Index;
2418 }
2419 llvm_unreachable("Invalid vector type.");
2420}
2421
2423 if (VT.getVectorElementType() == MVT::i1)
2424 return RISCV::VRRegClassID;
2425 return getRegClassIDForLMUL(getLMUL(VT));
2426}
2427
2428// Attempt to decompose a subvector insert/extract between VecVT and
2429// SubVecVT via subregister indices. Returns the subregister index that
2430// can perform the subvector insert/extract with the given element index, as
2431// well as the index corresponding to any leftover subvectors that must be
2432// further inserted/extracted within the register class for SubVecVT.
2433std::pair<unsigned, unsigned>
2435 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2436 const RISCVRegisterInfo *TRI) {
2437 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2438 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2439 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2440 "Register classes not ordered");
2441 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2442 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2443 // Try to compose a subregister index that takes us from the incoming
2444 // LMUL>1 register class down to the outgoing one. At each step we half
2445 // the LMUL:
2446 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2447 // Note that this is not guaranteed to find a subregister index, such as
2448 // when we are extracting from one VR type to another.
2449 unsigned SubRegIdx = RISCV::NoSubRegister;
2450 for (const unsigned RCID :
2451 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2452 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2453 VecVT = VecVT.getHalfNumVectorElementsVT();
2454 bool IsHi =
2455 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2456 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2457 getSubregIndexByMVT(VecVT, IsHi));
2458 if (IsHi)
2459 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2460 }
2461 return {SubRegIdx, InsertExtractIdx};
2462}
2463
2464// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2465// stores for those types.
2466bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2467 return !Subtarget.useRVVForFixedLengthVectors() ||
2468 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2469}
2470
2472 if (!ScalarTy.isSimple())
2473 return false;
2474 switch (ScalarTy.getSimpleVT().SimpleTy) {
2475 case MVT::iPTR:
2476 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2477 case MVT::i8:
2478 case MVT::i16:
2479 case MVT::i32:
2480 return true;
2481 case MVT::i64:
2482 return Subtarget.hasVInstructionsI64();
2483 case MVT::f16:
2484 return Subtarget.hasVInstructionsF16();
2485 case MVT::f32:
2486 return Subtarget.hasVInstructionsF32();
2487 case MVT::f64:
2488 return Subtarget.hasVInstructionsF64();
2489 default:
2490 return false;
2491 }
2492}
2493
2494
2495unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2496 return NumRepeatedDivisors;
2497}
2498
2500 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2501 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2502 "Unexpected opcode");
2503 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2504 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2506 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2507 if (!II)
2508 return SDValue();
2509 return Op.getOperand(II->VLOperand + 1 + HasChain);
2510}
2511
2513 const RISCVSubtarget &Subtarget) {
2514 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2515 if (!Subtarget.useRVVForFixedLengthVectors())
2516 return false;
2517
2518 // We only support a set of vector types with a consistent maximum fixed size
2519 // across all supported vector element types to avoid legalization issues.
2520 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2521 // fixed-length vector type we support is 1024 bytes.
2522 if (VT.getFixedSizeInBits() > 1024 * 8)
2523 return false;
2524
2525 unsigned MinVLen = Subtarget.getRealMinVLen();
2526
2527 MVT EltVT = VT.getVectorElementType();
2528
2529 // Don't use RVV for vectors we cannot scalarize if required.
2530 switch (EltVT.SimpleTy) {
2531 // i1 is supported but has different rules.
2532 default:
2533 return false;
2534 case MVT::i1:
2535 // Masks can only use a single register.
2536 if (VT.getVectorNumElements() > MinVLen)
2537 return false;
2538 MinVLen /= 8;
2539 break;
2540 case MVT::i8:
2541 case MVT::i16:
2542 case MVT::i32:
2543 break;
2544 case MVT::i64:
2545 if (!Subtarget.hasVInstructionsI64())
2546 return false;
2547 break;
2548 case MVT::f16:
2549 if (!Subtarget.hasVInstructionsF16Minimal())
2550 return false;
2551 break;
2552 case MVT::bf16:
2553 if (!Subtarget.hasVInstructionsBF16Minimal())
2554 return false;
2555 break;
2556 case MVT::f32:
2557 if (!Subtarget.hasVInstructionsF32())
2558 return false;
2559 break;
2560 case MVT::f64:
2561 if (!Subtarget.hasVInstructionsF64())
2562 return false;
2563 break;
2564 }
2565
2566 // Reject elements larger than ELEN.
2567 if (EltVT.getSizeInBits() > Subtarget.getELen())
2568 return false;
2569
2570 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2571 // Don't use RVV for types that don't fit.
2572 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2573 return false;
2574
2575 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2576 // the base fixed length RVV support in place.
2577 if (!VT.isPow2VectorType())
2578 return false;
2579
2580 return true;
2581}
2582
2583bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2584 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2585}
2586
2587// Return the largest legal scalable vector type that matches VT's element type.
2589 const RISCVSubtarget &Subtarget) {
2590 // This may be called before legal types are setup.
2591 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2592 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2593 "Expected legal fixed length vector!");
2594
2595 unsigned MinVLen = Subtarget.getRealMinVLen();
2596 unsigned MaxELen = Subtarget.getELen();
2597
2598 MVT EltVT = VT.getVectorElementType();
2599 switch (EltVT.SimpleTy) {
2600 default:
2601 llvm_unreachable("unexpected element type for RVV container");
2602 case MVT::i1:
2603 case MVT::i8:
2604 case MVT::i16:
2605 case MVT::i32:
2606 case MVT::i64:
2607 case MVT::bf16:
2608 case MVT::f16:
2609 case MVT::f32:
2610 case MVT::f64: {
2611 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2612 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2613 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2614 unsigned NumElts =
2616 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2617 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2618 return MVT::getScalableVectorVT(EltVT, NumElts);
2619 }
2620 }
2621}
2622
2624 const RISCVSubtarget &Subtarget) {
2626 Subtarget);
2627}
2628
2630 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2631}
2632
2633// Grow V to consume an entire RVV register.
2635 const RISCVSubtarget &Subtarget) {
2636 assert(VT.isScalableVector() &&
2637 "Expected to convert into a scalable vector!");
2638 assert(V.getValueType().isFixedLengthVector() &&
2639 "Expected a fixed length vector operand!");
2640 SDLoc DL(V);
2641 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2642 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2643}
2644
2645// Shrink V so it's just big enough to maintain a VT's worth of data.
2647 const RISCVSubtarget &Subtarget) {
2649 "Expected to convert into a fixed length vector!");
2650 assert(V.getValueType().isScalableVector() &&
2651 "Expected a scalable vector operand!");
2652 SDLoc DL(V);
2653 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2654 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2655}
2656
2657/// Return the type of the mask type suitable for masking the provided
2658/// vector type. This is simply an i1 element type vector of the same
2659/// (possibly scalable) length.
2660static MVT getMaskTypeFor(MVT VecVT) {
2661 assert(VecVT.isVector());
2663 return MVT::getVectorVT(MVT::i1, EC);
2664}
2665
2666/// Creates an all ones mask suitable for masking a vector of type VecTy with
2667/// vector length VL. .
2668static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2669 SelectionDAG &DAG) {
2670 MVT MaskVT = getMaskTypeFor(VecVT);
2671 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2672}
2673
2674static std::pair<SDValue, SDValue>
2676 const RISCVSubtarget &Subtarget) {
2677 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2678 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2679 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2680 return {Mask, VL};
2681}
2682
2683static std::pair<SDValue, SDValue>
2684getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2685 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2686 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2687 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2688 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2689 return {Mask, VL};
2690}
2691
2692// Gets the two common "VL" operands: an all-ones mask and the vector length.
2693// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2694// the vector type that the fixed-length vector is contained in. Otherwise if
2695// VecVT is scalable, then ContainerVT should be the same as VecVT.
2696static std::pair<SDValue, SDValue>
2697getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2698 const RISCVSubtarget &Subtarget) {
2699 if (VecVT.isFixedLengthVector())
2700 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2701 Subtarget);
2702 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2703 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2704}
2705
2707 SelectionDAG &DAG) const {
2708 assert(VecVT.isScalableVector() && "Expected scalable vector");
2709 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2710 VecVT.getVectorElementCount());
2711}
2712
2713std::pair<unsigned, unsigned>
2715 const RISCVSubtarget &Subtarget) {
2716 assert(VecVT.isScalableVector() && "Expected scalable vector");
2717
2718 unsigned EltSize = VecVT.getScalarSizeInBits();
2719 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2720
2721 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2722 unsigned MaxVLMAX =
2723 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2724
2725 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2726 unsigned MinVLMAX =
2727 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2728
2729 return std::make_pair(MinVLMAX, MaxVLMAX);
2730}
2731
2732// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2733// of either is (currently) supported. This can get us into an infinite loop
2734// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2735// as a ..., etc.
2736// Until either (or both) of these can reliably lower any node, reporting that
2737// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2738// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2739// which is not desirable.
2741 EVT VT, unsigned DefinedValues) const {
2742 return false;
2743}
2744
2746 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2747 // implementation-defined.
2748 if (!VT.isVector())
2750 unsigned DLenFactor = Subtarget.getDLenFactor();
2751 unsigned Cost;
2752 if (VT.isScalableVector()) {
2753 unsigned LMul;
2754 bool Fractional;
2755 std::tie(LMul, Fractional) =
2757 if (Fractional)
2758 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2759 else
2760 Cost = (LMul * DLenFactor);
2761 } else {
2762 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2763 }
2764 return Cost;
2765}
2766
2767
2768/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2769/// is generally quadratic in the number of vreg implied by LMUL. Note that
2770/// operand (index and possibly mask) are handled separately.
2772 return getLMULCost(VT) * getLMULCost(VT);
2773}
2774
2775/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2776/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2777/// or may track the vrgather.vv cost. It is implementation-dependent.
2779 return getLMULCost(VT);
2780}
2781
2782/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2783/// for the type VT. (This does not cover the vslide1up or vslide1down
2784/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2785/// or may track the vrgather.vv cost. It is implementation-dependent.
2787 return getLMULCost(VT);
2788}
2789
2790/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2791/// for the type VT. (This does not cover the vslide1up or vslide1down
2792/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2793/// or may track the vrgather.vv cost. It is implementation-dependent.
2795 return getLMULCost(VT);
2796}
2797
2799 const RISCVSubtarget &Subtarget) {
2800 // RISC-V FP-to-int conversions saturate to the destination register size, but
2801 // don't produce 0 for nan. We can use a conversion instruction and fix the
2802 // nan case with a compare and a select.
2803 SDValue Src = Op.getOperand(0);
2804
2805 MVT DstVT = Op.getSimpleValueType();
2806 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2807
2808 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2809
2810 if (!DstVT.isVector()) {
2811 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2812 // the result.
2813 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2814 Src.getValueType() == MVT::bf16) {
2815 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2816 }
2817
2818 unsigned Opc;
2819 if (SatVT == DstVT)
2820 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2821 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2823 else
2824 return SDValue();
2825 // FIXME: Support other SatVTs by clamping before or after the conversion.
2826
2827 SDLoc DL(Op);
2828 SDValue FpToInt = DAG.getNode(
2829 Opc, DL, DstVT, Src,
2831
2832 if (Opc == RISCVISD::FCVT_WU_RV64)
2833 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2834
2835 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2836 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2838 }
2839
2840 // Vectors.
2841
2842 MVT DstEltVT = DstVT.getVectorElementType();
2843 MVT SrcVT = Src.getSimpleValueType();
2844 MVT SrcEltVT = SrcVT.getVectorElementType();
2845 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2846 unsigned DstEltSize = DstEltVT.getSizeInBits();
2847
2848 // Only handle saturating to the destination type.
2849 if (SatVT != DstEltVT)
2850 return SDValue();
2851
2852 MVT DstContainerVT = DstVT;
2853 MVT SrcContainerVT = SrcVT;
2854 if (DstVT.isFixedLengthVector()) {
2855 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2856 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2857 assert(DstContainerVT.getVectorElementCount() ==
2858 SrcContainerVT.getVectorElementCount() &&
2859 "Expected same element count");
2860 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2861 }
2862
2863 SDLoc DL(Op);
2864
2865 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2866
2867 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2868 {Src, Src, DAG.getCondCode(ISD::SETNE),
2869 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2870
2871 // Need to widen by more than 1 step, promote the FP type, then do a widening
2872 // convert.
2873 if (DstEltSize > (2 * SrcEltSize)) {
2874 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2875 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2876 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2877 }
2878
2879 MVT CvtContainerVT = DstContainerVT;
2880 MVT CvtEltVT = DstEltVT;
2881 if (SrcEltSize > (2 * DstEltSize)) {
2882 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2883 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2884 }
2885
2886 unsigned RVVOpc =
2888 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
2889
2890 while (CvtContainerVT != DstContainerVT) {
2891 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
2892 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2893 // Rounding mode here is arbitrary since we aren't shifting out any bits.
2894 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
2896 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
2897 }
2898
2899 SDValue SplatZero = DAG.getNode(
2900 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2901 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2902 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
2903 Res, DAG.getUNDEF(DstContainerVT), VL);
2904
2905 if (DstVT.isFixedLengthVector())
2906 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2907
2908 return Res;
2909}
2910
2912 switch (Opc) {
2913 case ISD::FROUNDEVEN:
2915 case ISD::VP_FROUNDEVEN:
2916 return RISCVFPRndMode::RNE;
2917 case ISD::FTRUNC:
2918 case ISD::STRICT_FTRUNC:
2919 case ISD::VP_FROUNDTOZERO:
2920 return RISCVFPRndMode::RTZ;
2921 case ISD::FFLOOR:
2922 case ISD::STRICT_FFLOOR:
2923 case ISD::VP_FFLOOR:
2924 return RISCVFPRndMode::RDN;
2925 case ISD::FCEIL:
2926 case ISD::STRICT_FCEIL:
2927 case ISD::VP_FCEIL:
2928 return RISCVFPRndMode::RUP;
2929 case ISD::FROUND:
2930 case ISD::STRICT_FROUND:
2931 case ISD::VP_FROUND:
2932 return RISCVFPRndMode::RMM;
2933 case ISD::FRINT:
2934 return RISCVFPRndMode::DYN;
2935 }
2936
2938}
2939
2940// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2941// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2942// the integer domain and back. Taking care to avoid converting values that are
2943// nan or already correct.
2944static SDValue
2946 const RISCVSubtarget &Subtarget) {
2947 MVT VT = Op.getSimpleValueType();
2948 assert(VT.isVector() && "Unexpected type");
2949
2950 SDLoc DL(Op);
2951
2952 SDValue Src = Op.getOperand(0);
2953
2954 MVT ContainerVT = VT;
2955 if (VT.isFixedLengthVector()) {
2956 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2957 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2958 }
2959
2960 SDValue Mask, VL;
2961 if (Op->isVPOpcode()) {
2962 Mask = Op.getOperand(1);
2963 if (VT.isFixedLengthVector())
2964 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
2965 Subtarget);
2966 VL = Op.getOperand(2);
2967 } else {
2968 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2969 }
2970
2971 // Freeze the source since we are increasing the number of uses.
2972 Src = DAG.getFreeze(Src);
2973
2974 // We do the conversion on the absolute value and fix the sign at the end.
2975 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2976
2977 // Determine the largest integer that can be represented exactly. This and
2978 // values larger than it don't have any fractional bits so don't need to
2979 // be converted.
2980 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2981 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2982 APFloat MaxVal = APFloat(FltSem);
2983 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2984 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2985 SDValue MaxValNode =
2986 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2987 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2988 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2989
2990 // If abs(Src) was larger than MaxVal or nan, keep it.
2991 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2992 Mask =
2993 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
2994 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
2995 Mask, Mask, VL});
2996
2997 // Truncate to integer and convert back to FP.
2998 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
2999 MVT XLenVT = Subtarget.getXLenVT();
3000 SDValue Truncated;
3001
3002 switch (Op.getOpcode()) {
3003 default:
3004 llvm_unreachable("Unexpected opcode");
3005 case ISD::FCEIL:
3006 case ISD::VP_FCEIL:
3007 case ISD::FFLOOR:
3008 case ISD::VP_FFLOOR:
3009 case ISD::FROUND:
3010 case ISD::FROUNDEVEN:
3011 case ISD::VP_FROUND:
3012 case ISD::VP_FROUNDEVEN:
3013 case ISD::VP_FROUNDTOZERO: {
3016 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3017 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3018 break;
3019 }
3020 case ISD::FTRUNC:
3021 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3022 Mask, VL);
3023 break;
3024 case ISD::FRINT:
3025 case ISD::VP_FRINT:
3026 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
3027 break;
3028 case ISD::FNEARBYINT:
3029 case ISD::VP_FNEARBYINT:
3030 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3031 Mask, VL);
3032 break;
3033 }
3034
3035 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3036 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3037 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3038 Mask, VL);
3039
3040 // Restore the original sign so that -0.0 is preserved.
3041 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3042 Src, Src, Mask, VL);
3043
3044 if (!VT.isFixedLengthVector())
3045 return Truncated;
3046
3047 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3048}
3049
3050// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3051// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3052// qNan and coverting the new source to integer and back to FP.
3053static SDValue
3055 const RISCVSubtarget &Subtarget) {
3056 SDLoc DL(Op);
3057 MVT VT = Op.getSimpleValueType();
3058 SDValue Chain = Op.getOperand(0);
3059 SDValue Src = Op.getOperand(1);
3060
3061 MVT ContainerVT = VT;
3062 if (VT.isFixedLengthVector()) {
3063 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3064 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3065 }
3066
3067 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3068
3069 // Freeze the source since we are increasing the number of uses.
3070 Src = DAG.getFreeze(Src);
3071
3072 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3073 MVT MaskVT = Mask.getSimpleValueType();
3075 DAG.getVTList(MaskVT, MVT::Other),
3076 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3077 DAG.getUNDEF(MaskVT), Mask, VL});
3078 Chain = Unorder.getValue(1);
3080 DAG.getVTList(ContainerVT, MVT::Other),
3081 {Chain, Src, Src, Src, Unorder, VL});
3082 Chain = Src.getValue(1);
3083
3084 // We do the conversion on the absolute value and fix the sign at the end.
3085 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3086
3087 // Determine the largest integer that can be represented exactly. This and
3088 // values larger than it don't have any fractional bits so don't need to
3089 // be converted.
3090 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3091 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3092 APFloat MaxVal = APFloat(FltSem);
3093 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3094 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3095 SDValue MaxValNode =
3096 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3097 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3098 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3099
3100 // If abs(Src) was larger than MaxVal or nan, keep it.
3101 Mask = DAG.getNode(
3102 RISCVISD::SETCC_VL, DL, MaskVT,
3103 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3104
3105 // Truncate to integer and convert back to FP.
3106 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3107 MVT XLenVT = Subtarget.getXLenVT();
3108 SDValue Truncated;
3109
3110 switch (Op.getOpcode()) {
3111 default:
3112 llvm_unreachable("Unexpected opcode");
3113 case ISD::STRICT_FCEIL:
3114 case ISD::STRICT_FFLOOR:
3115 case ISD::STRICT_FROUND:
3119 Truncated = DAG.getNode(
3120 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3121 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3122 break;
3123 }
3124 case ISD::STRICT_FTRUNC:
3125 Truncated =
3127 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3128 break;
3131 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3132 Mask, VL);
3133 break;
3134 }
3135 Chain = Truncated.getValue(1);
3136
3137 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3138 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3139 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3140 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3141 Truncated, Mask, VL);
3142 Chain = Truncated.getValue(1);
3143 }
3144
3145 // Restore the original sign so that -0.0 is preserved.
3146 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3147 Src, Src, Mask, VL);
3148
3149 if (VT.isFixedLengthVector())
3150 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3151 return DAG.getMergeValues({Truncated, Chain}, DL);
3152}
3153
3154static SDValue
3156 const RISCVSubtarget &Subtarget) {
3157 MVT VT = Op.getSimpleValueType();
3158 if (VT.isVector())
3159 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3160
3161 if (DAG.shouldOptForSize())
3162 return SDValue();
3163
3164 SDLoc DL(Op);
3165 SDValue Src = Op.getOperand(0);
3166
3167 // Create an integer the size of the mantissa with the MSB set. This and all
3168 // values larger than it don't have any fractional bits so don't need to be
3169 // converted.
3170 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
3171 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3172 APFloat MaxVal = APFloat(FltSem);
3173 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3174 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3175 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3176
3178 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3179 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3180}
3181
3182// Expand vector LRINT and LLRINT by converting to the integer domain.
3184 const RISCVSubtarget &Subtarget) {
3185 MVT VT = Op.getSimpleValueType();
3186 assert(VT.isVector() && "Unexpected type");
3187
3188 SDLoc DL(Op);
3189 SDValue Src = Op.getOperand(0);
3190 MVT ContainerVT = VT;
3191
3192 if (VT.isFixedLengthVector()) {
3193 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3194 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3195 }
3196
3197 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3198 SDValue Truncated =
3199 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3200
3201 if (!VT.isFixedLengthVector())
3202 return Truncated;
3203
3204 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3205}
3206
3207static SDValue
3209 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3210 SDValue Offset, SDValue Mask, SDValue VL,
3212 if (Passthru.isUndef())
3214 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3215 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3216 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3217}
3218
3219static SDValue
3220getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3221 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3222 SDValue VL,
3224 if (Passthru.isUndef())
3226 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3227 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3228 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3229}
3230
3231static MVT getLMUL1VT(MVT VT) {
3233 "Unexpected vector MVT");
3237}
3238
3242 int64_t Addend;
3243};
3244
3245static std::optional<APInt> getExactInteger(const APFloat &APF,
3247 // We will use a SINT_TO_FP to materialize this constant so we should use a
3248 // signed APSInt here.
3249 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3250 // We use an arbitrary rounding mode here. If a floating-point is an exact
3251 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3252 // the rounding mode changes the output value, then it is not an exact
3253 // integer.
3255 bool IsExact;
3256 // If it is out of signed integer range, it will return an invalid operation.
3257 // If it is not an exact integer, IsExact is false.
3258 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3260 !IsExact)
3261 return std::nullopt;
3262 return ValInt.extractBits(BitWidth, 0);
3263}
3264
3265// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3266// to the (non-zero) step S and start value X. This can be then lowered as the
3267// RVV sequence (VID * S) + X, for example.
3268// The step S is represented as an integer numerator divided by a positive
3269// denominator. Note that the implementation currently only identifies
3270// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3271// cannot detect 2/3, for example.
3272// Note that this method will also match potentially unappealing index
3273// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3274// determine whether this is worth generating code for.
3275//
3276// EltSizeInBits is the size of the type that the sequence will be calculated
3277// in, i.e. SEW for build_vectors or XLEN for address calculations.
3278static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3279 unsigned EltSizeInBits) {
3280 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3281 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3282 return std::nullopt;
3283 bool IsInteger = Op.getValueType().isInteger();
3284
3285 std::optional<unsigned> SeqStepDenom;
3286 std::optional<APInt> SeqStepNum;
3287 std::optional<APInt> SeqAddend;
3288 std::optional<std::pair<APInt, unsigned>> PrevElt;
3289 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3290
3291 // First extract the ops into a list of constant integer values. This may not
3292 // be possible for floats if they're not all representable as integers.
3294 const unsigned OpSize = Op.getScalarValueSizeInBits();
3295 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3296 if (Elt.isUndef()) {
3297 Elts[Idx] = std::nullopt;
3298 continue;
3299 }
3300 if (IsInteger) {
3301 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3302 } else {
3303 auto ExactInteger =
3304 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3305 if (!ExactInteger)
3306 return std::nullopt;
3307 Elts[Idx] = *ExactInteger;
3308 }
3309 }
3310
3311 for (auto [Idx, Elt] : enumerate(Elts)) {
3312 // Assume undef elements match the sequence; we just have to be careful
3313 // when interpolating across them.
3314 if (!Elt)
3315 continue;
3316
3317 if (PrevElt) {
3318 // Calculate the step since the last non-undef element, and ensure
3319 // it's consistent across the entire sequence.
3320 unsigned IdxDiff = Idx - PrevElt->second;
3321 APInt ValDiff = *Elt - PrevElt->first;
3322
3323 // A zero-value value difference means that we're somewhere in the middle
3324 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3325 // step change before evaluating the sequence.
3326 if (ValDiff == 0)
3327 continue;
3328
3329 int64_t Remainder = ValDiff.srem(IdxDiff);
3330 // Normalize the step if it's greater than 1.
3331 if (Remainder != ValDiff.getSExtValue()) {
3332 // The difference must cleanly divide the element span.
3333 if (Remainder != 0)
3334 return std::nullopt;
3335 ValDiff = ValDiff.sdiv(IdxDiff);
3336 IdxDiff = 1;
3337 }
3338
3339 if (!SeqStepNum)
3340 SeqStepNum = ValDiff;
3341 else if (ValDiff != SeqStepNum)
3342 return std::nullopt;
3343
3344 if (!SeqStepDenom)
3345 SeqStepDenom = IdxDiff;
3346 else if (IdxDiff != *SeqStepDenom)
3347 return std::nullopt;
3348 }
3349
3350 // Record this non-undef element for later.
3351 if (!PrevElt || PrevElt->first != *Elt)
3352 PrevElt = std::make_pair(*Elt, Idx);
3353 }
3354
3355 // We need to have logged a step for this to count as a legal index sequence.
3356 if (!SeqStepNum || !SeqStepDenom)
3357 return std::nullopt;
3358
3359 // Loop back through the sequence and validate elements we might have skipped
3360 // while waiting for a valid step. While doing this, log any sequence addend.
3361 for (auto [Idx, Elt] : enumerate(Elts)) {
3362 if (!Elt)
3363 continue;
3364 APInt ExpectedVal =
3365 (APInt(EltSizeInBits, Idx) * *SeqStepNum).sdiv(*SeqStepDenom);
3366
3367 APInt Addend = *Elt - ExpectedVal;
3368 if (!SeqAddend)
3369 SeqAddend = Addend;
3370 else if (Addend != SeqAddend)
3371 return std::nullopt;
3372 }
3373
3374 assert(SeqAddend && "Must have an addend if we have a step");
3375
3376 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3377 SeqAddend->getSExtValue()};
3378}
3379
3380// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3381// and lower it as a VRGATHER_VX_VL from the source vector.
3382static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3383 SelectionDAG &DAG,
3384 const RISCVSubtarget &Subtarget) {
3385 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3386 return SDValue();
3387 SDValue Vec = SplatVal.getOperand(0);
3388 // Only perform this optimization on vectors of the same size for simplicity.
3389 // Don't perform this optimization for i1 vectors.
3390 // FIXME: Support i1 vectors, maybe by promoting to i8?
3391 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3392 return SDValue();
3393 SDValue Idx = SplatVal.getOperand(1);
3394 // The index must be a legal type.
3395 if (Idx.getValueType() != Subtarget.getXLenVT())
3396 return SDValue();
3397
3398 MVT ContainerVT = VT;
3399 if (VT.isFixedLengthVector()) {
3400 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3401 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3402 }
3403
3404 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3405
3406 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3407 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3408
3409 if (!VT.isFixedLengthVector())
3410 return Gather;
3411
3412 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3413}
3414
3415
3416/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3417/// which constitute a large proportion of the elements. In such cases we can
3418/// splat a vector with the dominant element and make up the shortfall with
3419/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3420/// Note that this includes vectors of 2 elements by association. The
3421/// upper-most element is the "dominant" one, allowing us to use a splat to
3422/// "insert" the upper element, and an insert of the lower element at position
3423/// 0, which improves codegen.
3425 const RISCVSubtarget &Subtarget) {
3426 MVT VT = Op.getSimpleValueType();
3427 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3428
3429 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3430
3431 SDLoc DL(Op);
3432 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3433
3434 MVT XLenVT = Subtarget.getXLenVT();
3435 unsigned NumElts = Op.getNumOperands();
3436
3437 SDValue DominantValue;
3438 unsigned MostCommonCount = 0;
3439 DenseMap<SDValue, unsigned> ValueCounts;
3440 unsigned NumUndefElts =
3441 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3442
3443 // Track the number of scalar loads we know we'd be inserting, estimated as
3444 // any non-zero floating-point constant. Other kinds of element are either
3445 // already in registers or are materialized on demand. The threshold at which
3446 // a vector load is more desirable than several scalar materializion and
3447 // vector-insertion instructions is not known.
3448 unsigned NumScalarLoads = 0;
3449
3450 for (SDValue V : Op->op_values()) {
3451 if (V.isUndef())
3452 continue;
3453
3454 ValueCounts.insert(std::make_pair(V, 0));
3455 unsigned &Count = ValueCounts[V];
3456 if (0 == Count)
3457 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3458 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3459
3460 // Is this value dominant? In case of a tie, prefer the highest element as
3461 // it's cheaper to insert near the beginning of a vector than it is at the
3462 // end.
3463 if (++Count >= MostCommonCount) {
3464 DominantValue = V;
3465 MostCommonCount = Count;
3466 }
3467 }
3468
3469 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3470 unsigned NumDefElts = NumElts - NumUndefElts;
3471 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3472
3473 // Don't perform this optimization when optimizing for size, since
3474 // materializing elements and inserting them tends to cause code bloat.
3475 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3476 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3477 ((MostCommonCount > DominantValueCountThreshold) ||
3478 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3479 // Start by splatting the most common element.
3480 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3481
3482 DenseSet<SDValue> Processed{DominantValue};
3483
3484 // We can handle an insert into the last element (of a splat) via
3485 // v(f)slide1down. This is slightly better than the vslideup insert
3486 // lowering as it avoids the need for a vector group temporary. It
3487 // is also better than using vmerge.vx as it avoids the need to
3488 // materialize the mask in a vector register.
3489 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3490 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3491 LastOp != DominantValue) {
3492 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3493 auto OpCode =
3495 if (!VT.isFloatingPoint())
3496 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3497 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3498 LastOp, Mask, VL);
3499 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3500 Processed.insert(LastOp);
3501 }
3502
3503 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3504 for (const auto &OpIdx : enumerate(Op->ops())) {
3505 const SDValue &V = OpIdx.value();
3506 if (V.isUndef() || !Processed.insert(V).second)
3507 continue;
3508 if (ValueCounts[V] == 1) {
3509 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3510 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3511 } else {
3512 // Blend in all instances of this value using a VSELECT, using a
3513 // mask where each bit signals whether that element is the one
3514 // we're after.
3516 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3517 return DAG.getConstant(V == V1, DL, XLenVT);
3518 });
3519 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3520 DAG.getBuildVector(SelMaskTy, DL, Ops),
3521 DAG.getSplatBuildVector(VT, DL, V), Vec);
3522 }
3523 }
3524
3525 return Vec;
3526 }
3527
3528 return SDValue();
3529}
3530
3532 const RISCVSubtarget &Subtarget) {
3533 MVT VT = Op.getSimpleValueType();
3534 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3535
3536 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3537
3538 SDLoc DL(Op);
3539 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3540
3541 MVT XLenVT = Subtarget.getXLenVT();
3542 unsigned NumElts = Op.getNumOperands();
3543
3544 if (VT.getVectorElementType() == MVT::i1) {
3545 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3546 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3547 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3548 }
3549
3550 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3551 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3552 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3553 }
3554
3555 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3556 // scalar integer chunks whose bit-width depends on the number of mask
3557 // bits and XLEN.
3558 // First, determine the most appropriate scalar integer type to use. This
3559 // is at most XLenVT, but may be shrunk to a smaller vector element type
3560 // according to the size of the final vector - use i8 chunks rather than
3561 // XLenVT if we're producing a v8i1. This results in more consistent
3562 // codegen across RV32 and RV64.
3563 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3564 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3565 // If we have to use more than one INSERT_VECTOR_ELT then this
3566 // optimization is likely to increase code size; avoid peforming it in
3567 // such a case. We can use a load from a constant pool in this case.
3568 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3569 return SDValue();
3570 // Now we can create our integer vector type. Note that it may be larger
3571 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3572 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3573 MVT IntegerViaVecVT =
3574 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3575 IntegerViaVecElts);
3576
3577 uint64_t Bits = 0;
3578 unsigned BitPos = 0, IntegerEltIdx = 0;
3579 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3580
3581 for (unsigned I = 0; I < NumElts;) {
3582 SDValue V = Op.getOperand(I);
3583 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3584 Bits |= ((uint64_t)BitValue << BitPos);
3585 ++BitPos;
3586 ++I;
3587
3588 // Once we accumulate enough bits to fill our scalar type or process the
3589 // last element, insert into our vector and clear our accumulated data.
3590 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3591 if (NumViaIntegerBits <= 32)
3592 Bits = SignExtend64<32>(Bits);
3593 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3594 Elts[IntegerEltIdx] = Elt;
3595 Bits = 0;
3596 BitPos = 0;
3597 IntegerEltIdx++;
3598 }
3599 }
3600
3601 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3602
3603 if (NumElts < NumViaIntegerBits) {
3604 // If we're producing a smaller vector than our minimum legal integer
3605 // type, bitcast to the equivalent (known-legal) mask type, and extract
3606 // our final mask.
3607 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3608 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3609 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3610 DAG.getConstant(0, DL, XLenVT));
3611 } else {
3612 // Else we must have produced an integer type with the same size as the
3613 // mask type; bitcast for the final result.
3614 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3615 Vec = DAG.getBitcast(VT, Vec);
3616 }
3617
3618 return Vec;
3619 }
3620
3621 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3622 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3624 if (!VT.isFloatingPoint())
3625 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3626 Splat =
3627 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3628 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3629 }
3630
3631 // Try and match index sequences, which we can lower to the vid instruction
3632 // with optional modifications. An all-undef vector is matched by
3633 // getSplatValue, above.
3634 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3635 int64_t StepNumerator = SimpleVID->StepNumerator;
3636 unsigned StepDenominator = SimpleVID->StepDenominator;
3637 int64_t Addend = SimpleVID->Addend;
3638
3639 assert(StepNumerator != 0 && "Invalid step");
3640 bool Negate = false;
3641 int64_t SplatStepVal = StepNumerator;
3642 unsigned StepOpcode = ISD::MUL;
3643 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3644 // anyway as the shift of 63 won't fit in uimm5.
3645 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3646 isPowerOf2_64(std::abs(StepNumerator))) {
3647 Negate = StepNumerator < 0;
3648 StepOpcode = ISD::SHL;
3649 SplatStepVal = Log2_64(std::abs(StepNumerator));
3650 }
3651
3652 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3653 // threshold since it's the immediate value many RVV instructions accept.
3654 // There is no vmul.vi instruction so ensure multiply constant can fit in
3655 // a single addi instruction.
3656 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3657 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3658 isPowerOf2_32(StepDenominator) &&
3659 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3660 MVT VIDVT =
3662 MVT VIDContainerVT =
3663 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3664 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3665 // Convert right out of the scalable type so we can use standard ISD
3666 // nodes for the rest of the computation. If we used scalable types with
3667 // these, we'd lose the fixed-length vector info and generate worse
3668 // vsetvli code.
3669 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3670 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3671 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3672 SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
3673 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3674 }
3675 if (StepDenominator != 1) {
3676 SDValue SplatStep =
3677 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3678 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3679 }
3680 if (Addend != 0 || Negate) {
3681 SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
3682 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3683 VID);
3684 }
3685 if (VT.isFloatingPoint()) {
3686 // TODO: Use vfwcvt to reduce register pressure.
3687 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3688 }
3689 return VID;
3690 }
3691 }
3692
3693 // For very small build_vectors, use a single scalar insert of a constant.
3694 // TODO: Base this on constant rematerialization cost, not size.
3695 const unsigned EltBitSize = VT.getScalarSizeInBits();
3696 if (VT.getSizeInBits() <= 32 &&
3698 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3699 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3700 "Unexpected sequence type");
3701 // If we can use the original VL with the modified element type, this
3702 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3703 // be moved into InsertVSETVLI?
3704 unsigned ViaVecLen =
3705 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3706 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3707
3708 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3709 uint64_t SplatValue = 0;
3710 // Construct the amalgamated value at this larger vector type.
3711 for (const auto &OpIdx : enumerate(Op->op_values())) {
3712 const auto &SeqV = OpIdx.value();
3713 if (!SeqV.isUndef())
3714 SplatValue |=
3715 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3716 }
3717
3718 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3719 // achieve better constant materializion.
3720 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3721 SplatValue = SignExtend64<32>(SplatValue);
3722
3723 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3724 DAG.getUNDEF(ViaVecVT),
3725 DAG.getConstant(SplatValue, DL, XLenVT),
3726 DAG.getVectorIdxConstant(0, DL));
3727 if (ViaVecLen != 1)
3729 MVT::getVectorVT(ViaIntVT, 1), Vec,
3730 DAG.getConstant(0, DL, XLenVT));
3731 return DAG.getBitcast(VT, Vec);
3732 }
3733
3734
3735 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3736 // when re-interpreted as a vector with a larger element type. For example,
3737 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3738 // could be instead splat as
3739 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3740 // TODO: This optimization could also work on non-constant splats, but it
3741 // would require bit-manipulation instructions to construct the splat value.
3742 SmallVector<SDValue> Sequence;
3743 const auto *BV = cast<BuildVectorSDNode>(Op);
3744 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3746 BV->getRepeatedSequence(Sequence) &&
3747 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3748 unsigned SeqLen = Sequence.size();
3749 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3750 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3751 ViaIntVT == MVT::i64) &&
3752 "Unexpected sequence type");
3753
3754 // If we can use the original VL with the modified element type, this
3755 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3756 // be moved into InsertVSETVLI?
3757 const unsigned RequiredVL = NumElts / SeqLen;
3758 const unsigned ViaVecLen =
3759 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3760 NumElts : RequiredVL;
3761 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3762
3763 unsigned EltIdx = 0;
3764 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3765 uint64_t SplatValue = 0;
3766 // Construct the amalgamated value which can be splatted as this larger
3767 // vector type.
3768 for (const auto &SeqV : Sequence) {
3769 if (!SeqV.isUndef())
3770 SplatValue |=
3771 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3772 EltIdx++;
3773 }
3774
3775 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3776 // achieve better constant materializion.
3777 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3778 SplatValue = SignExtend64<32>(SplatValue);
3779
3780 // Since we can't introduce illegal i64 types at this stage, we can only
3781 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3782 // way we can use RVV instructions to splat.
3783 assert((ViaIntVT.bitsLE(XLenVT) ||
3784 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3785 "Unexpected bitcast sequence");
3786 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3787 SDValue ViaVL =
3788 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3789 MVT ViaContainerVT =
3790 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3791 SDValue Splat =
3792 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3793 DAG.getUNDEF(ViaContainerVT),
3794 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3795 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3796 if (ViaVecLen != RequiredVL)
3798 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3799 DAG.getConstant(0, DL, XLenVT));
3800 return DAG.getBitcast(VT, Splat);
3801 }
3802 }
3803
3804 // If the number of signbits allows, see if we can lower as a <N x i8>.
3805 // Our main goal here is to reduce LMUL (and thus work) required to
3806 // build the constant, but we will also narrow if the resulting
3807 // narrow vector is known to materialize cheaply.
3808 // TODO: We really should be costing the smaller vector. There are
3809 // profitable cases this misses.
3810 if (EltBitSize > 8 && VT.isInteger() &&
3811 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
3812 unsigned SignBits = DAG.ComputeNumSignBits(Op);
3813 if (EltBitSize - SignBits < 8) {
3814 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3815 DL, Op->ops());
3816 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3817 Source, DAG, Subtarget);
3818 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3819 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3820 }
3821 }
3822
3823 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3824 return Res;
3825
3826 // For constant vectors, use generic constant pool lowering. Otherwise,
3827 // we'd have to materialize constants in GPRs just to move them into the
3828 // vector.
3829 return SDValue();
3830}
3831
3832static unsigned getPACKOpcode(unsigned DestBW,
3833 const RISCVSubtarget &Subtarget) {
3834 switch (DestBW) {
3835 default:
3836 llvm_unreachable("Unsupported pack size");
3837 case 16:
3838 return RISCV::PACKH;
3839 case 32:
3840 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
3841 case 64:
3842 assert(Subtarget.is64Bit());
3843 return RISCV::PACK;
3844 }
3845}
3846
3847/// Double the element size of the build vector to reduce the number
3848/// of vslide1down in the build vector chain. In the worst case, this
3849/// trades three scalar operations for 1 vector operation. Scalar
3850/// operations are generally lower latency, and for out-of-order cores
3851/// we also benefit from additional parallelism.
3853 const RISCVSubtarget &Subtarget) {
3854 SDLoc DL(Op);
3855 MVT VT = Op.getSimpleValueType();
3856 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3857 MVT ElemVT = VT.getVectorElementType();
3858 if (!ElemVT.isInteger())
3859 return SDValue();
3860
3861 // TODO: Relax these architectural restrictions, possibly with costing
3862 // of the actual instructions required.
3863 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
3864 return SDValue();
3865
3866 unsigned NumElts = VT.getVectorNumElements();
3867 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
3868 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
3869 NumElts % 2 != 0)
3870 return SDValue();
3871
3872 // Produce [B,A] packed into a type twice as wide. Note that all
3873 // scalars are XLenVT, possibly masked (see below).
3874 MVT XLenVT = Subtarget.getXLenVT();
3875 SDValue Mask = DAG.getConstant(
3876 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
3877 auto pack = [&](SDValue A, SDValue B) {
3878 // Bias the scheduling of the inserted operations to near the
3879 // definition of the element - this tends to reduce register
3880 // pressure overall.
3881 SDLoc ElemDL(B);
3882 if (Subtarget.hasStdExtZbkb())
3883 // Note that we're relying on the high bits of the result being
3884 // don't care. For PACKW, the result is *sign* extended.
3885 return SDValue(
3886 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
3887 ElemDL, XLenVT, A, B),
3888 0);
3889
3890 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
3891 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
3892 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
3893 SDNodeFlags Flags;
3894 Flags.setDisjoint(true);
3895 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
3896 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt), Flags);
3897 };
3898
3899 SmallVector<SDValue> NewOperands;
3900 NewOperands.reserve(NumElts / 2);
3901 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
3902 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
3903 assert(NumElts == NewOperands.size() * 2);
3904 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
3905 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
3906 return DAG.getNode(ISD::BITCAST, DL, VT,
3907 DAG.getBuildVector(WideVecVT, DL, NewOperands));
3908}
3909
3910// Convert to an vXf16 build_vector to vXi16 with bitcasts.
3912 MVT VT = Op.getSimpleValueType();
3913 MVT IVT = VT.changeVectorElementType(MVT::i16);
3915 for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I)
3916 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
3917 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), IVT, NewOps);
3918 return DAG.getBitcast(VT, Res);
3919}
3920
3922 const RISCVSubtarget &Subtarget) {
3923 MVT VT = Op.getSimpleValueType();
3924 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3925
3926 // If we don't have scalar f16, we need to bitcast to an i16 vector.
3927 if (VT.getVectorElementType() == MVT::f16 &&
3928 !Subtarget.hasStdExtZfhmin())
3929 return lowerBUILD_VECTORvXf16(Op, DAG);
3930
3931 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3933 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3934
3935 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3936
3937 SDLoc DL(Op);
3938 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3939
3940 MVT XLenVT = Subtarget.getXLenVT();
3941
3942 if (VT.getVectorElementType() == MVT::i1) {
3943 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3944 // vector type, we have a legal equivalently-sized i8 type, so we can use
3945 // that.
3946 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3947 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3948
3949 SDValue WideVec;
3950 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3951 // For a splat, perform a scalar truncate before creating the wider
3952 // vector.
3953 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
3954 DAG.getConstant(1, DL, Splat.getValueType()));
3955 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3956 } else {
3957 SmallVector<SDValue, 8> Ops(Op->op_values());
3958 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3959 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3960 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3961 }
3962
3963 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3964 }
3965
3966 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3967 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3968 return Gather;
3969 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3971 if (!VT.isFloatingPoint())
3972 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3973 Splat =
3974 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3975 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3976 }
3977
3978 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3979 return Res;
3980
3981 // If we're compiling for an exact VLEN value, we can split our work per
3982 // register in the register group.
3983 if (const auto VLen = Subtarget.getRealVLen();
3984 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
3985 MVT ElemVT = VT.getVectorElementType();
3986 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
3987 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3988 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
3989 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
3990 assert(M1VT == getLMUL1VT(M1VT));
3991
3992 // The following semantically builds up a fixed length concat_vector
3993 // of the component build_vectors. We eagerly lower to scalable and
3994 // insert_subvector here to avoid DAG combining it back to a large
3995 // build_vector.
3996 SmallVector<SDValue> BuildVectorOps(Op->ops());
3997 unsigned NumOpElts = M1VT.getVectorMinNumElements();
3998 SDValue Vec = DAG.getUNDEF(ContainerVT);
3999 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4000 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4001 SDValue SubBV =
4002 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4003 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4004 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4005 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
4006 DAG.getVectorIdxConstant(InsertIdx, DL));
4007 }
4008 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4009 }
4010
4011 // If we're about to resort to vslide1down (or stack usage), pack our
4012 // elements into the widest scalar type we can. This will force a VL/VTYPE
4013 // toggle, but reduces the critical path, the number of vslide1down ops
4014 // required, and possibly enables scalar folds of the values.
4015 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4016 return Res;
4017
4018 // For m1 vectors, if we have non-undef values in both halves of our vector,
4019 // split the vector into low and high halves, build them separately, then
4020 // use a vselect to combine them. For long vectors, this cuts the critical
4021 // path of the vslide1down sequence in half, and gives us an opportunity
4022 // to special case each half independently. Note that we don't change the
4023 // length of the sub-vectors here, so if both fallback to the generic
4024 // vslide1down path, we should be able to fold the vselect into the final
4025 // vslidedown (for the undef tail) for the first half w/ masking.
4026 unsigned NumElts = VT.getVectorNumElements();
4027 unsigned NumUndefElts =
4028 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4029 unsigned NumDefElts = NumElts - NumUndefElts;
4030 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4031 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
4032 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4033 SmallVector<SDValue> MaskVals;
4034 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4035 SubVecAOps.reserve(NumElts);
4036 SubVecBOps.reserve(NumElts);
4037 for (unsigned i = 0; i < NumElts; i++) {
4038 SDValue Elem = Op->getOperand(i);
4039 if (i < NumElts / 2) {
4040 SubVecAOps.push_back(Elem);
4041 SubVecBOps.push_back(UndefElem);
4042 } else {
4043 SubVecAOps.push_back(UndefElem);
4044 SubVecBOps.push_back(Elem);
4045 }
4046 bool SelectMaskVal = (i < NumElts / 2);
4047 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4048 }
4049 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4050 MaskVals.size() == NumElts);
4051
4052 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4053 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4054 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4055 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4056 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4057 }
4058
4059 // Cap the cost at a value linear to the number of elements in the vector.
4060 // The default lowering is to use the stack. The vector store + scalar loads
4061 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4062 // being (at least) linear in LMUL. As a result, using the vslidedown
4063 // lowering for every element ends up being VL*LMUL..
4064 // TODO: Should we be directly costing the stack alternative? Doing so might
4065 // give us a more accurate upper bound.
4066 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4067
4068 // TODO: unify with TTI getSlideCost.
4069 InstructionCost PerSlideCost = 1;
4070 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4071 default: break;
4073 PerSlideCost = 2;
4074 break;
4076 PerSlideCost = 4;
4077 break;
4079 PerSlideCost = 8;
4080 break;
4081 }
4082
4083 // TODO: Should we be using the build instseq then cost + evaluate scheme
4084 // we use for integer constants here?
4085 unsigned UndefCount = 0;
4086 for (const SDValue &V : Op->ops()) {
4087 if (V.isUndef()) {
4088 UndefCount++;
4089 continue;
4090 }
4091 if (UndefCount) {
4092 LinearBudget -= PerSlideCost;
4093 UndefCount = 0;
4094 }
4095 LinearBudget -= PerSlideCost;
4096 }
4097 if (UndefCount) {
4098 LinearBudget -= PerSlideCost;
4099 }
4100
4101 if (LinearBudget < 0)
4102 return SDValue();
4103
4104 assert((!VT.isFloatingPoint() ||
4105 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4106 "Illegal type which will result in reserved encoding");
4107
4108 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4109
4110 SDValue Vec;
4111 UndefCount = 0;
4112 for (SDValue V : Op->ops()) {
4113 if (V.isUndef()) {
4114 UndefCount++;
4115 continue;
4116 }
4117
4118 // Start our sequence with a TA splat in the hopes that hardware is able to
4119 // recognize there's no dependency on the prior value of our temporary
4120 // register.
4121 if (!Vec) {
4122 Vec = DAG.getSplatVector(VT, DL, V);
4123 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4124 UndefCount = 0;
4125 continue;
4126 }
4127
4128 if (UndefCount) {
4129 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4130 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4131 Vec, Offset, Mask, VL, Policy);
4132 UndefCount = 0;
4133 }
4134 auto OpCode =
4136 if (!VT.isFloatingPoint())
4137 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4138 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4139 V, Mask, VL);
4140 }
4141 if (UndefCount) {
4142 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4143 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4144 Vec, Offset, Mask, VL, Policy);
4145 }
4146 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4147}
4148
4149static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4151 SelectionDAG &DAG) {
4152 if (!Passthru)
4153 Passthru = DAG.getUNDEF(VT);
4154 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4155 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4156 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4157 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4158 // node in order to try and match RVV vector/scalar instructions.
4159 if ((LoC >> 31) == HiC)
4160 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4161
4162 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4163 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4164 // vlmax vsetvli or vsetivli to change the VL.
4165 // FIXME: Support larger constants?
4166 // FIXME: Support non-constant VLs by saturating?
4167 if (LoC == HiC) {
4168 SDValue NewVL;
4169 if (isAllOnesConstant(VL) ||
4170 (isa<RegisterSDNode>(VL) &&
4171 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4172 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4173 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4174 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4175
4176 if (NewVL) {
4177 MVT InterVT =
4178 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4179 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4180 DAG.getUNDEF(InterVT), Lo, NewVL);
4181 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4182 }
4183 }
4184 }
4185
4186 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4187 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4188 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4189 Hi.getConstantOperandVal(1) == 31)
4190 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4191
4192 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4193 // even if it might be sign extended.
4194 if (Hi.isUndef())
4195 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4196
4197 // Fall back to a stack store and stride x0 vector load.
4198 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4199 Hi, VL);
4200}
4201
4202// Called by type legalization to handle splat of i64 on RV32.
4203// FIXME: We can optimize this when the type has sign or zero bits in one
4204// of the halves.
4205static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4206 SDValue Scalar, SDValue VL,
4207 SelectionDAG &DAG) {
4208 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4209 SDValue Lo, Hi;
4210 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4211 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4212}
4213
4214// This function lowers a splat of a scalar operand Splat with the vector
4215// length VL. It ensures the final sequence is type legal, which is useful when
4216// lowering a splat after type legalization.
4217static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4218 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4219 const RISCVSubtarget &Subtarget) {
4220 bool HasPassthru = Passthru && !Passthru.isUndef();
4221 if (!HasPassthru && !Passthru)
4222 Passthru = DAG.getUNDEF(VT);
4223 if (VT.isFloatingPoint())
4224 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4225
4226 MVT XLenVT = Subtarget.getXLenVT();
4227
4228 // Simplest case is that the operand needs to be promoted to XLenVT.
4229 if (Scalar.getValueType().bitsLE(XLenVT)) {
4230 // If the operand is a constant, sign extend to increase our chances
4231 // of being able to use a .vi instruction. ANY_EXTEND would become a
4232 // a zero extend and the simm5 check in isel would fail.
4233 // FIXME: Should we ignore the upper bits in isel instead?
4234 unsigned ExtOpc =
4235 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4236 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4237 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4238 }
4239
4240 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4241 "Unexpected scalar for splat lowering!");
4242
4243 if (isOneConstant(VL) && isNullConstant(Scalar))
4244 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4245 DAG.getConstant(0, DL, XLenVT), VL);
4246
4247 // Otherwise use the more complicated splatting algorithm.
4248 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4249}
4250
4251// This function lowers an insert of a scalar operand Scalar into lane
4252// 0 of the vector regardless of the value of VL. The contents of the
4253// remaining lanes of the result vector are unspecified. VL is assumed
4254// to be non-zero.
4256 const SDLoc &DL, SelectionDAG &DAG,
4257 const RISCVSubtarget &Subtarget) {
4258 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4259
4260 const MVT XLenVT = Subtarget.getXLenVT();
4261 SDValue Passthru = DAG.getUNDEF(VT);
4262
4263 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4264 isNullConstant(Scalar.getOperand(1))) {
4265 SDValue ExtractedVal = Scalar.getOperand(0);
4266 // The element types must be the same.
4267 if (ExtractedVal.getValueType().getVectorElementType() ==
4268 VT.getVectorElementType()) {
4269 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4270 MVT ExtractedContainerVT = ExtractedVT;
4271 if (ExtractedContainerVT.isFixedLengthVector()) {
4272 ExtractedContainerVT = getContainerForFixedLengthVector(
4273 DAG, ExtractedContainerVT, Subtarget);
4274 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4275 ExtractedVal, DAG, Subtarget);
4276 }
4277 if (ExtractedContainerVT.bitsLE(VT))
4278 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4279 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4280 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4281 DAG.getVectorIdxConstant(0, DL));
4282 }
4283 }
4284
4285
4286 if (VT.isFloatingPoint())
4287 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4288 DAG.getUNDEF(VT), Scalar, VL);
4289
4290 // Avoid the tricky legalization cases by falling back to using the
4291 // splat code which already handles it gracefully.
4292 if (!Scalar.getValueType().bitsLE(XLenVT))
4293 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4294 DAG.getConstant(1, DL, XLenVT),
4295 VT, DL, DAG, Subtarget);
4296
4297 // If the operand is a constant, sign extend to increase our chances
4298 // of being able to use a .vi instruction. ANY_EXTEND would become a
4299 // a zero extend and the simm5 check in isel would fail.
4300 // FIXME: Should we ignore the upper bits in isel instead?
4301 unsigned ExtOpc =
4302 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4303 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4304 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
4305 DAG.getUNDEF(VT), Scalar, VL);
4306}
4307
4308// Is this a shuffle extracts either the even or odd elements of a vector?
4309// That is, specifically, either (a) or (b) below.
4310// t34: v8i8 = extract_subvector t11, Constant:i64<0>
4311// t33: v8i8 = extract_subvector t11, Constant:i64<8>
4312// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4313// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4314// Returns {Src Vector, Even Elements} om success
4315static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4316 SDValue V2, ArrayRef<int> Mask,
4317 const RISCVSubtarget &Subtarget) {
4318 // Need to be able to widen the vector.
4319 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4320 return false;
4321
4322 // Both input must be extracts.
4323 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4324 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4325 return false;
4326
4327 // Extracting from the same source.
4328 SDValue Src = V1.getOperand(0);
4329 if (Src != V2.getOperand(0))
4330 return false;
4331
4332 // Src needs to have twice the number of elements.
4333 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4334 return false;
4335
4336 // The extracts must extract the two halves of the source.
4337 if (V1.getConstantOperandVal(1) != 0 ||
4338 V2.getConstantOperandVal(1) != Mask.size())
4339 return false;
4340
4341 // First index must be the first even or odd element from V1.
4342 if (Mask[0] != 0 && Mask[0] != 1)
4343 return false;
4344
4345 // The others must increase by 2 each time.
4346 // TODO: Support undef elements?
4347 for (unsigned i = 1; i != Mask.size(); ++i)
4348 if (Mask[i] != Mask[i - 1] + 2)
4349 return false;
4350
4351 return true;
4352}
4353
4354/// Is this shuffle interleaving contiguous elements from one vector into the
4355/// even elements and contiguous elements from another vector into the odd
4356/// elements. \p EvenSrc will contain the element that should be in the first
4357/// even element. \p OddSrc will contain the element that should be in the first
4358/// odd element. These can be the first element in a source or the element half
4359/// way through the source.
4360static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4361 int &OddSrc, const RISCVSubtarget &Subtarget) {
4362 // We need to be able to widen elements to the next larger integer type.
4363 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4364 return false;
4365
4366 int Size = Mask.size();
4367 int NumElts = VT.getVectorNumElements();
4368 assert(Size == (int)NumElts && "Unexpected mask size");
4369
4370 SmallVector<unsigned, 2> StartIndexes;
4371 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4372 return false;
4373
4374 EvenSrc = StartIndexes[0];
4375 OddSrc = StartIndexes[1];
4376
4377 // One source should be low half of first vector.
4378 if (EvenSrc != 0 && OddSrc != 0)
4379 return false;
4380
4381 // Subvectors will be subtracted from either at the start of the two input
4382 // vectors, or at the start and middle of the first vector if it's an unary
4383 // interleave.
4384 // In both cases, HalfNumElts will be extracted.
4385 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4386 // we'll create an illegal extract_subvector.
4387 // FIXME: We could support other values using a slidedown first.
4388 int HalfNumElts = NumElts / 2;
4389 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4390}
4391
4392/// Match shuffles that concatenate two vectors, rotate the concatenation,
4393/// and then extract the original number of elements from the rotated result.
4394/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4395/// returned rotation amount is for a rotate right, where elements move from
4396/// higher elements to lower elements. \p LoSrc indicates the first source
4397/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4398/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4399/// 0 or 1 if a rotation is found.
4400///
4401/// NOTE: We talk about rotate to the right which matches how bit shift and
4402/// rotate instructions are described where LSBs are on the right, but LLVM IR
4403/// and the table below write vectors with the lowest elements on the left.
4404static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4405 int Size = Mask.size();
4406
4407 // We need to detect various ways of spelling a rotation:
4408 // [11, 12, 13, 14, 15, 0, 1, 2]
4409 // [-1, 12, 13, 14, -1, -1, 1, -1]
4410 // [-1, -1, -1, -1, -1, -1, 1, 2]
4411 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4412 // [-1, 4, 5, 6, -1, -1, 9, -1]
4413 // [-1, 4, 5, 6, -1, -1, -1, -1]
4414 int Rotation = 0;
4415 LoSrc = -1;
4416 HiSrc = -1;
4417 for (int i = 0; i != Size; ++i) {
4418 int M = Mask[i];
4419 if (M < 0)
4420 continue;
4421
4422 // Determine where a rotate vector would have started.
4423 int StartIdx = i - (M % Size);
4424 // The identity rotation isn't interesting, stop.
4425 if (StartIdx == 0)
4426 return -1;
4427
4428 // If we found the tail of a vector the rotation must be the missing
4429 // front. If we found the head of a vector, it must be how much of the
4430 // head.
4431 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4432
4433 if (Rotation == 0)
4434 Rotation = CandidateRotation;
4435 else if (Rotation != CandidateRotation)
4436 // The rotations don't match, so we can't match this mask.
4437 return -1;
4438
4439 // Compute which value this mask is pointing at.
4440 int MaskSrc = M < Size ? 0 : 1;
4441
4442 // Compute which of the two target values this index should be assigned to.
4443 // This reflects whether the high elements are remaining or the low elemnts
4444 // are remaining.
4445 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4446
4447 // Either set up this value if we've not encountered it before, or check
4448 // that it remains consistent.
4449 if (TargetSrc < 0)
4450 TargetSrc = MaskSrc;
4451 else if (TargetSrc != MaskSrc)
4452 // This may be a rotation, but it pulls from the inputs in some
4453 // unsupported interleaving.
4454 return -1;
4455 }
4456
4457 // Check that we successfully analyzed the mask, and normalize the results.
4458 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4459 assert((LoSrc >= 0 || HiSrc >= 0) &&
4460 "Failed to find a rotated input vector!");
4461
4462 return Rotation;
4463}
4464
4465// Lower a deinterleave shuffle to vnsrl.
4466// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4467// -> [p, q, r, s] (EvenElts == false)
4468// VT is the type of the vector to return, <[vscale x ]n x ty>
4469// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4471 bool EvenElts,
4472 const RISCVSubtarget &Subtarget,
4473 SelectionDAG &DAG) {
4474 // The result is a vector of type <m x n x ty>
4475 MVT ContainerVT = VT;
4476 // Convert fixed vectors to scalable if needed
4477 if (ContainerVT.isFixedLengthVector()) {
4478 assert(Src.getSimpleValueType().isFixedLengthVector());
4479 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
4480
4481 // The source is a vector of type <m x n*2 x ty>
4482 MVT SrcContainerVT =
4484 ContainerVT.getVectorElementCount() * 2);
4485 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4486 }
4487
4488 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4489
4490 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4491 // This also converts FP to int.
4492 unsigned EltBits = ContainerVT.getScalarSizeInBits();
4493 MVT WideSrcContainerVT = MVT::getVectorVT(
4494 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
4495 Src = DAG.getBitcast(WideSrcContainerVT, Src);
4496
4497 // The integer version of the container type.
4498 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
4499
4500 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4501 // the original element size.
4502 unsigned Shift = EvenElts ? 0 : EltBits;
4503 SDValue SplatShift = DAG.getNode(
4504 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
4505 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
4506 SDValue Res =
4507 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
4508 DAG.getUNDEF(IntContainerVT), TrueMask, VL);
4509 // Cast back to FP if needed.
4510 Res = DAG.getBitcast(ContainerVT, Res);
4511
4512 if (VT.isFixedLengthVector())
4513 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
4514 return Res;
4515}
4516
4517// Lower the following shuffle to vslidedown.
4518// a)
4519// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4520// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4521// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4522// b)
4523// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4524// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4525// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4526// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4527// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4528// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4530 SDValue V1, SDValue V2,
4531 ArrayRef<int> Mask,
4532 const RISCVSubtarget &Subtarget,
4533 SelectionDAG &DAG) {
4534 auto findNonEXTRACT_SUBVECTORParent =
4535 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4536 uint64_t Offset = 0;
4537 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4538 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4539 // a scalable vector. But we don't want to match the case.
4540 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4541 Offset += Parent.getConstantOperandVal(1);
4542 Parent = Parent.getOperand(0);
4543 }
4544 return std::make_pair(Parent, Offset);
4545 };
4546
4547 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4548 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4549
4550 // Extracting from the same source.
4551 SDValue Src = V1Src;
4552 if (Src != V2Src)
4553 return SDValue();
4554
4555 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4556 SmallVector<int, 16> NewMask(Mask);
4557 for (size_t i = 0; i != NewMask.size(); ++i) {
4558 if (NewMask[i] == -1)
4559 continue;
4560
4561 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4562 NewMask[i] = NewMask[i] + V1IndexOffset;
4563 } else {
4564 // Minus NewMask.size() is needed. Otherwise, the b case would be
4565 // <5,6,7,12> instead of <5,6,7,8>.
4566 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4567 }
4568 }
4569
4570 // First index must be known and non-zero. It will be used as the slidedown
4571 // amount.
4572 if (NewMask[0] <= 0)
4573 return SDValue();
4574
4575 // NewMask is also continuous.
4576 for (unsigned i = 1; i != NewMask.size(); ++i)
4577 if (NewMask[i - 1] + 1 != NewMask[i])
4578 return SDValue();
4579
4580 MVT XLenVT = Subtarget.getXLenVT();
4581 MVT SrcVT = Src.getSimpleValueType();
4582 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4583 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4584 SDValue Slidedown =
4585 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4586 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4587 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4588 return DAG.getNode(
4590 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4591 DAG.getConstant(0, DL, XLenVT));
4592}
4593
4594// Because vslideup leaves the destination elements at the start intact, we can
4595// use it to perform shuffles that insert subvectors:
4596//
4597// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4598// ->
4599// vsetvli zero, 8, e8, mf2, ta, ma
4600// vslideup.vi v8, v9, 4
4601//
4602// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4603// ->
4604// vsetvli zero, 5, e8, mf2, tu, ma
4605// vslideup.v1 v8, v9, 2
4607 SDValue V1, SDValue V2,
4608 ArrayRef<int> Mask,
4609 const RISCVSubtarget &Subtarget,
4610 SelectionDAG &DAG) {
4611 unsigned NumElts = VT.getVectorNumElements();
4612 int NumSubElts, Index;
4613 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4614 Index))
4615 return SDValue();
4616
4617 bool OpsSwapped = Mask[Index] < (int)NumElts;
4618 SDValue InPlace = OpsSwapped ? V2 : V1;
4619 SDValue ToInsert = OpsSwapped ? V1 : V2;
4620
4621 MVT XLenVT = Subtarget.getXLenVT();
4622 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4623 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4624 // We slide up by the index that the subvector is being inserted at, and set
4625 // VL to the index + the number of elements being inserted.
4627 // If the we're adding a suffix to the in place vector, i.e. inserting right
4628 // up to the very end of it, then we don't actually care about the tail.
4629 if (NumSubElts + Index >= (int)NumElts)
4630 Policy |= RISCVII::TAIL_AGNOSTIC;
4631
4632 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4633 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4634 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4635
4636 SDValue Res;
4637 // If we're inserting into the lowest elements, use a tail undisturbed
4638 // vmv.v.v.
4639 if (Index == 0)
4640 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4641 VL);
4642 else
4643 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4644 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4645 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4646}
4647
4648/// Match v(f)slide1up/down idioms. These operations involve sliding
4649/// N-1 elements to make room for an inserted scalar at one end.
4651 SDValue V1, SDValue V2,
4652 ArrayRef<int> Mask,
4653 const RISCVSubtarget &Subtarget,
4654 SelectionDAG &DAG) {
4655 bool OpsSwapped = false;
4656 if (!isa<BuildVectorSDNode>(V1)) {
4657 if (!isa<BuildVectorSDNode>(V2))
4658 return SDValue();
4659 std::swap(V1, V2);
4660 OpsSwapped = true;
4661 }
4662 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4663 if (!Splat)
4664 return SDValue();
4665
4666 // Return true if the mask could describe a slide of Mask.size() - 1
4667 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4668 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4669 const unsigned S = (Offset > 0) ? 0 : -Offset;
4670 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4671 for (unsigned i = S; i != E; ++i)
4672 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4673 return false;
4674 return true;
4675 };
4676
4677 const unsigned NumElts = VT.getVectorNumElements();
4678 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4679 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4680 return SDValue();
4681
4682 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4683 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4684 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4685 return SDValue();
4686
4687 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4688 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4689 auto OpCode = IsVSlidedown ?
4692 if (!VT.isFloatingPoint())
4693 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4694 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4695 DAG.getUNDEF(ContainerVT),
4696 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4697 Splat, TrueMask, VL);
4698 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4699}
4700
4701// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4702// to create an interleaved vector of <[vscale x] n*2 x ty>.
4703// This requires that the size of ty is less than the subtarget's maximum ELEN.
4705 const SDLoc &DL, SelectionDAG &DAG,
4706 const RISCVSubtarget &Subtarget) {
4707 MVT VecVT = EvenV.getSimpleValueType();
4708 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4709 // Convert fixed vectors to scalable if needed
4710 if (VecContainerVT.isFixedLengthVector()) {
4711 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4712 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4713 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4714 }
4715
4716 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4717
4718 // We're working with a vector of the same size as the resulting
4719 // interleaved vector, but with half the number of elements and
4720 // twice the SEW (Hence the restriction on not using the maximum
4721 // ELEN)
4722 MVT WideVT =
4724 VecVT.getVectorElementCount());
4725 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4726 if (WideContainerVT.isFixedLengthVector())
4727 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4728
4729 // Bitcast the input vectors to integers in case they are FP
4730 VecContainerVT = VecContainerVT.changeTypeToInteger();
4731 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4732 OddV = DAG.getBitcast(VecContainerVT, OddV);
4733
4734 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4735 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4736
4737 SDValue Interleaved;
4738 if (OddV.isUndef()) {
4739 // If OddV is undef, this is a zero extend.
4740 // FIXME: Not only does this optimize the code, it fixes some correctness
4741 // issues because MIR does not have freeze.
4742 Interleaved =
4743 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);
4744 } else if (Subtarget.hasStdExtZvbb()) {
4745 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4746 SDValue OffsetVec =
4747 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4748 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4749 OffsetVec, Passthru, Mask, VL);
4750 if (!EvenV.isUndef())
4751 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4752 Interleaved, EvenV, Passthru, Mask, VL);
4753 } else if (EvenV.isUndef()) {
4754 Interleaved =
4755 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);
4756
4757 SDValue OffsetVec =
4758 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
4759 Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
4760 Interleaved, OffsetVec, Passthru, Mask, VL);
4761 } else {
4762 // FIXME: We should freeze the odd vector here. We already handled the case
4763 // of provably undef/poison above.
4764
4765 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4766 // vwaddu.vv
4767 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4768 OddV, Passthru, Mask, VL);
4769
4770 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4771 SDValue AllOnesVec = DAG.getSplatVector(
4772 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4773 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4774 OddV, AllOnesVec, Passthru, Mask, VL);
4775
4776 // Add the two together so we get
4777 // (OddV * 0xff...ff) + (OddV + EvenV)
4778 // = (OddV * 0x100...00) + EvenV
4779 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4780 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4781 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4782 Interleaved, OddsMul, Passthru, Mask, VL);
4783 }
4784
4785 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4786 MVT ResultContainerVT = MVT::getVectorVT(
4787 VecVT.getVectorElementType(), // Make sure to use original type
4788 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4789 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4790
4791 // Convert back to a fixed vector if needed
4792 MVT ResultVT =
4795 if (ResultVT.isFixedLengthVector())
4796 Interleaved =
4797 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4798
4799 return Interleaved;
4800}
4801
4802// If we have a vector of bits that we want to reverse, we can use a vbrev on a
4803// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4805 SelectionDAG &DAG,
4806 const RISCVSubtarget &Subtarget) {
4807 SDLoc DL(SVN);
4808 MVT VT = SVN->getSimpleValueType(0);
4809 SDValue V = SVN->getOperand(0);
4810 unsigned NumElts = VT.getVectorNumElements();
4811
4812 assert(VT.getVectorElementType() == MVT::i1);
4813
4815 SVN->getMask().size()) ||
4816 !SVN->getOperand(1).isUndef())
4817 return SDValue();
4818
4819 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
4820 EVT ViaVT = EVT::getVectorVT(
4821 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
4822 EVT ViaBitVT =
4823 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
4824
4825 // If we don't have zvbb or the larger element type > ELEN, the operation will
4826 // be illegal.
4828 ViaVT) ||
4829 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
4830 return SDValue();
4831
4832 // If the bit vector doesn't fit exactly into the larger element type, we need
4833 // to insert it into the larger vector and then shift up the reversed bits
4834 // afterwards to get rid of the gap introduced.
4835 if (ViaEltSize > NumElts)
4836 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
4837 V, DAG.getVectorIdxConstant(0, DL));
4838
4839 SDValue Res =
4840 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
4841
4842 // Shift up the reversed bits if the vector didn't exactly fit into the larger
4843 // element type.
4844 if (ViaEltSize > NumElts)
4845 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
4846 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
4847
4848 Res = DAG.getBitcast(ViaBitVT, Res);
4849
4850 if (ViaEltSize > NumElts)
4851 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
4852 DAG.getVectorIdxConstant(0, DL));
4853 return Res;
4854}
4855
4857 SelectionDAG &DAG,
4858 const RISCVSubtarget &Subtarget,
4859 MVT &RotateVT, unsigned &RotateAmt) {
4860 SDLoc DL(SVN);
4861
4862 EVT VT = SVN->getValueType(0);
4863 unsigned NumElts = VT.getVectorNumElements();
4864 unsigned EltSizeInBits = VT.getScalarSizeInBits();
4865 unsigned NumSubElts;
4866 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
4867 NumElts, NumSubElts, RotateAmt))
4868 return false;
4869 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
4870 NumElts / NumSubElts);
4871
4872 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4873 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
4874}
4875
4876// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4877// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4878// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4880 SelectionDAG &DAG,
4881 const RISCVSubtarget &Subtarget) {
4882 SDLoc DL(SVN);
4883
4884 EVT VT = SVN->getValueType(0);
4885 unsigned RotateAmt;
4886 MVT RotateVT;
4887 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4888 return SDValue();
4889
4890 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
4891
4892 SDValue Rotate;
4893 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4894 // so canonicalize to vrev8.
4895 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
4896 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
4897 else
4898 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
4899 DAG.getConstant(RotateAmt, DL, RotateVT));
4900
4901 return DAG.getBitcast(VT, Rotate);
4902}
4903
4904// If compiling with an exactly known VLEN, see if we can split a
4905// shuffle on m2 or larger into a small number of m1 sized shuffles
4906// which write each destination registers exactly once.
4908 SelectionDAG &DAG,
4909 const RISCVSubtarget &Subtarget) {
4910 SDLoc DL(SVN);
4911 MVT VT = SVN->getSimpleValueType(0);
4912 SDValue V1 = SVN->getOperand(0);
4913 SDValue V2 = SVN->getOperand(1);
4914 ArrayRef<int> Mask = SVN->getMask();
4915 unsigned NumElts = VT.getVectorNumElements();
4916
4917 // If we don't know exact data layout, not much we can do. If this
4918 // is already m1 or smaller, no point in splitting further.
4919 const auto VLen = Subtarget.getRealVLen();
4920 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
4921 return SDValue();
4922
4923 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
4924 // expansion for.
4925 unsigned RotateAmt;
4926 MVT RotateVT;
4927 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4928 return SDValue();
4929
4930 MVT ElemVT = VT.getVectorElementType();
4931 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4932 unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
4933
4935 OutMasks(VRegsPerSrc, {-1, {}});
4936
4937 // Check if our mask can be done as a 1-to-1 mapping from source
4938 // to destination registers in the group without needing to
4939 // write each destination more than once.
4940 for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
4941 int DstVecIdx = DstIdx / ElemsPerVReg;
4942 int DstSubIdx = DstIdx % ElemsPerVReg;
4943 int SrcIdx = Mask[DstIdx];
4944 if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
4945 continue;
4946 int SrcVecIdx = SrcIdx / ElemsPerVReg;
4947 int SrcSubIdx = SrcIdx % ElemsPerVReg;
4948 if (OutMasks[DstVecIdx].first == -1)
4949 OutMasks[DstVecIdx].first = SrcVecIdx;
4950 if (OutMasks[DstVecIdx].first != SrcVecIdx)
4951 // Note: This case could easily be handled by keeping track of a chain
4952 // of source values and generating two element shuffles below. This is
4953 // less an implementation question, and more a profitability one.
4954 return SDValue();
4955
4956 OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
4957 OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
4958 }
4959
4960 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4961 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4962 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4963 assert(M1VT == getLMUL1VT(M1VT));
4964 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4965 SDValue Vec = DAG.getUNDEF(ContainerVT);
4966 // The following semantically builds up a fixed length concat_vector
4967 // of the component shuffle_vectors. We eagerly lower to scalable here
4968 // to avoid DAG combining it back to a large shuffle_vector again.
4969 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4970 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
4971 for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
4972 auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
4973 if (SrcVecIdx == -1)
4974 continue;
4975 unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
4976 SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
4977 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
4978 DAG.getVectorIdxConstant(ExtractIdx, DL));
4979 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
4980 SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
4981 SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
4982 unsigned InsertIdx = DstVecIdx * NumOpElts;
4983 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,
4984 DAG.getVectorIdxConstant(InsertIdx, DL));
4985 }
4986 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4987}
4988
4990 const RISCVSubtarget &Subtarget) {
4991 SDValue V1 = Op.getOperand(0);
4992 SDValue V2 = Op.getOperand(1);
4993 SDLoc DL(Op);
4994 MVT XLenVT = Subtarget.getXLenVT();
4995 MVT VT = Op.getSimpleValueType();
4996 unsigned NumElts = VT.getVectorNumElements();
4997 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
4998
4999 if (VT.getVectorElementType() == MVT::i1) {
5000 // Lower to a vror.vi of a larger element type if possible before we promote
5001 // i1s to i8s.
5002 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5003 return V;
5004 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5005 return V;
5006
5007 // Promote i1 shuffle to i8 shuffle.
5008 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5009 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5010 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5011 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5012 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5013 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5014 ISD::SETNE);
5015 }
5016
5017 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5018
5019 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5020
5021 if (SVN->isSplat()) {
5022 const int Lane = SVN->getSplatIndex();
5023 if (Lane >= 0) {
5024 MVT SVT = VT.getVectorElementType();
5025
5026 // Turn splatted vector load into a strided load with an X0 stride.
5027 SDValue V = V1;
5028 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5029 // with undef.
5030 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5031 int Offset = Lane;
5032 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5033 int OpElements =
5034 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5035 V = V.getOperand(Offset / OpElements);
5036 Offset %= OpElements;
5037 }
5038
5039 // We need to ensure the load isn't atomic or volatile.
5040 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5041 auto *Ld = cast<LoadSDNode>(V);
5042 Offset *= SVT.getStoreSize();
5043 SDValue NewAddr = DAG.getMemBasePlusOffset(
5044 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5045
5046 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5047 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5048 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5049 SDValue IntID =
5050 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5051 SDValue Ops[] = {Ld->getChain(),
5052 IntID,
5053 DAG.getUNDEF(ContainerVT),
5054 NewAddr,
5055 DAG.getRegister(RISCV::X0, XLenVT),
5056 VL};
5057 SDValue NewLoad = DAG.getMemIntrinsicNode(
5058 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5060 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5061 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5062 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5063 }
5064
5065 MVT SplatVT = ContainerVT;
5066
5067 // If we don't have Zfh, we need to use an integer scalar load.
5068 if (SVT == MVT::f16 && !Subtarget.hasStdExtZfh()) {
5069 SVT = MVT::i16;
5070 SplatVT = ContainerVT.changeVectorElementType(SVT);
5071 }
5072
5073 // Otherwise use a scalar load and splat. This will give the best
5074 // opportunity to fold a splat into the operation. ISel can turn it into
5075 // the x0 strided load if we aren't able to fold away the select.
5076 if (SVT.isFloatingPoint())
5077 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5078 Ld->getPointerInfo().getWithOffset(Offset),
5079 Ld->getOriginalAlign(),
5080 Ld->getMemOperand()->getFlags());
5081 else
5082 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5083 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5084 Ld->getOriginalAlign(),
5085 Ld->getMemOperand()->getFlags());
5087
5088 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5090 SDValue Splat =
5091 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5092 Splat = DAG.getBitcast(ContainerVT, Splat);
5093 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5094 }
5095
5096 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5097 assert(Lane < (int)NumElts && "Unexpected lane!");
5098 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5099 V1, DAG.getConstant(Lane, DL, XLenVT),
5100 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5101 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5102 }
5103 }
5104
5105 // For exact VLEN m2 or greater, try to split to m1 operations if we
5106 // can split cleanly.
5107 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5108 return V;
5109
5110 ArrayRef<int> Mask = SVN->getMask();
5111
5112 if (SDValue V =
5113 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5114 return V;
5115
5116 if (SDValue V =
5117 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5118 return V;
5119
5120 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5121 // available.
5122 if (Subtarget.hasStdExtZvkb())
5123 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5124 return V;
5125
5126 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5127 // be undef which can be handled with a single SLIDEDOWN/UP.
5128 int LoSrc, HiSrc;
5129 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5130 if (Rotation > 0) {
5131 SDValue LoV, HiV;
5132 if (LoSrc >= 0) {
5133 LoV = LoSrc == 0 ? V1 : V2;
5134 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5135 }
5136 if (HiSrc >= 0) {
5137 HiV = HiSrc == 0 ? V1 : V2;
5138 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5139 }
5140
5141 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5142 // to slide LoV up by (NumElts - Rotation).
5143 unsigned InvRotate = NumElts - Rotation;
5144
5145 SDValue Res = DAG.getUNDEF(ContainerVT);
5146 if (HiV) {
5147 // Even though we could use a smaller VL, don't to avoid a vsetivli
5148 // toggle.
5149 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5150 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5151 }
5152 if (LoV)
5153 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5154 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5156
5157 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5158 }
5159
5160 // If this is a deinterleave and we can widen the vector, then we can use
5161 // vnsrl to deinterleave.
5162 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
5163 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
5164 Subtarget, DAG);
5165 }
5166
5167 if (SDValue V =
5168 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5169 return V;
5170
5171 // Detect an interleave shuffle and lower to
5172 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5173 int EvenSrc, OddSrc;
5174 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5175 // Extract the halves of the vectors.
5176 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5177
5178 int Size = Mask.size();
5179 SDValue EvenV, OddV;
5180 assert(EvenSrc >= 0 && "Undef source?");
5181 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5182 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5183 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5184
5185 assert(OddSrc >= 0 && "Undef source?");
5186 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5187 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5188 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5189
5190 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5191 }
5192
5193
5194 // Handle any remaining single source shuffles
5195 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5196 if (V2.isUndef()) {
5197 // We might be able to express the shuffle as a bitrotate. But even if we
5198 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5199 // shifts and a vor will have a higher throughput than a vrgather.
5200 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5201 return V;
5202
5203 if (VT.getScalarSizeInBits() == 8 &&
5204 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5205 // On such a vector we're unable to use i8 as the index type.
5206 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5207 // may involve vector splitting if we're already at LMUL=8, or our
5208 // user-supplied maximum fixed-length LMUL.
5209 return SDValue();
5210 }
5211
5212 // Base case for the two operand recursion below - handle the worst case
5213 // single source shuffle.
5214 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5215 MVT IndexVT = VT.changeTypeToInteger();
5216 // Since we can't introduce illegal index types at this stage, use i16 and
5217 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5218 // than XLenVT.
5219 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5220 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5221 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5222 }
5223
5224 // If the mask allows, we can do all the index computation in 16 bits. This
5225 // requires less work and less register pressure at high LMUL, and creates
5226 // smaller constants which may be cheaper to materialize.
5227 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5228 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5229 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5230 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5231 }
5232
5233 MVT IndexContainerVT =
5234 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5235
5236 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5237 SmallVector<SDValue> GatherIndicesLHS;
5238 for (int MaskIndex : Mask) {
5239 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5240 GatherIndicesLHS.push_back(IsLHSIndex
5241 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5242 : DAG.getUNDEF(XLenVT));
5243 }
5244 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5245 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5246 Subtarget);
5247 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5248 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5249 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5250 }
5251
5252 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5253 // merged with a second vrgather.
5254 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5255
5256 // Now construct the mask that will be used by the blended vrgather operation.
5257 // Construct the appropriate indices into each vector.
5258 for (int MaskIndex : Mask) {
5259 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5260 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5261 ? MaskIndex : -1);
5262 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5263 }
5264
5265 // Try to pick a profitable operand order.
5266 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5267 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
5268
5269 // Recursively invoke lowering for each operand if we had two
5270 // independent single source shuffles, and then combine the result via a
5271 // vselect. Note that the vselect will likely be folded back into the
5272 // second permute (vrgather, or other) by the post-isel combine.
5273 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5274 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5275
5276 SmallVector<SDValue> MaskVals;
5277 for (int MaskIndex : Mask) {
5278 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5279 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5280 }
5281
5282 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5283 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5284 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5285
5286 if (SwapOps)
5287 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5288 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5289}
5290
5292 // Support splats for any type. These should type legalize well.
5293 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5294 return true;
5295
5296 // Only support legal VTs for other shuffles for now.
5297 if (!isTypeLegal(VT))
5298 return false;
5299
5300 MVT SVT = VT.getSimpleVT();
5301
5302 // Not for i1 vectors.
5303 if (SVT.getScalarType() == MVT::i1)
5304 return false;
5305
5306 int Dummy1, Dummy2;
5307 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5308 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5309}
5310
5311// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5312// the exponent.
5313SDValue
5314RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5315 SelectionDAG &DAG) const {
5316 MVT VT = Op.getSimpleValueType();
5317 unsigned EltSize = VT.getScalarSizeInBits();
5318 SDValue Src = Op.getOperand(0);
5319 SDLoc DL(Op);
5320 MVT ContainerVT = VT;
5321
5322 SDValue Mask, VL;
5323 if (Op->isVPOpcode()) {
5324 Mask = Op.getOperand(1);
5325 if (VT.isFixedLengthVector())
5326 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5327 Subtarget);
5328 VL = Op.getOperand(2);
5329 }
5330
5331 // We choose FP type that can represent the value if possible. Otherwise, we
5332 // use rounding to zero conversion for correct exponent of the result.
5333 // TODO: Use f16 for i8 when possible?
5334 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5335 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5336 FloatEltVT = MVT::f32;
5337 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5338
5339 // Legal types should have been checked in the RISCVTargetLowering
5340 // constructor.
5341 // TODO: Splitting may make sense in some cases.
5342 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5343 "Expected legal float type!");
5344
5345 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5346 // The trailing zero count is equal to log2 of this single bit value.
5347 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5348 SDValue Neg = DAG.getNegative(Src, DL, VT);
5349 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5350 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5351 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5352 Src, Mask, VL);
5353 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5354 }
5355
5356 // We have a legal FP type, convert to it.
5357 SDValue FloatVal;
5358 if (FloatVT.bitsGT(VT)) {
5359 if (Op->isVPOpcode())
5360 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5361 else
5362 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5363 } else {
5364 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5365 if (VT.isFixedLengthVector()) {
5366 ContainerVT = getContainerForFixedLengthVector(VT);
5367 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5368 }
5369 if (!Op->isVPOpcode())
5370 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5371 SDValue RTZRM =
5373 MVT ContainerFloatVT =
5374 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5375 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5376 Src, Mask, RTZRM, VL);
5377 if (VT.isFixedLengthVector())
5378 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5379 }
5380 // Bitcast to integer and shift the exponent to the LSB.
5381 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5382 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5383 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5384
5385 SDValue Exp;
5386 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5387 if (Op->isVPOpcode()) {
5388 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
5389 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5390 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5391 } else {
5392 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5393 DAG.getConstant(ShiftAmt, DL, IntVT));
5394 if (IntVT.bitsLT(VT))
5395 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5396 else if (IntVT.bitsGT(VT))
5397 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5398 }
5399
5400 // The exponent contains log2 of the value in biased form.
5401 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5402 // For trailing zeros, we just need to subtract the bias.
5403 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5404 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5405 DAG.getConstant(ExponentBias, DL, VT));
5406 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5407 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5408 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5409
5410 // For leading zeros, we need to remove the bias and convert from log2 to
5411 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5412 unsigned Adjust = ExponentBias + (EltSize - 1);
5413 SDValue Res;
5414 if (Op->isVPOpcode())
5415 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5416 Mask, VL);
5417 else
5418 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5419
5420 // The above result with zero input equals to Adjust which is greater than
5421 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5422 if (Op.getOpcode() == ISD::CTLZ)
5423 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5424 else if (Op.getOpcode() == ISD::VP_CTLZ)
5425 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5426 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5427 return Res;
5428}
5429
5430SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
5431 SelectionDAG &DAG) const {
5432 SDLoc DL(Op);
5433 MVT XLenVT = Subtarget.getXLenVT();
5434 SDValue Source = Op->getOperand(0);
5435 MVT SrcVT = Source.getSimpleValueType();
5436 SDValue Mask = Op->getOperand(1);
5437 SDValue EVL = Op->getOperand(2);
5438
5439 if (SrcVT.isFixedLengthVector()) {
5440 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
5441 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
5442 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5443 Subtarget);
5444 SrcVT = ContainerVT;
5445 }
5446
5447 // Convert to boolean vector.
5448 if (SrcVT.getScalarType() != MVT::i1) {
5449 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
5450 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
5451 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
5452 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
5453 DAG.getUNDEF(SrcVT), Mask, EVL});
5454 }
5455
5456 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
5457 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
5458 // In this case, we can interpret poison as -1, so nothing to do further.
5459 return Res;
5460
5461 // Convert -1 to VL.
5462 SDValue SetCC =
5463 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
5464 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
5465 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
5466}
5467
5468// While RVV has alignment restrictions, we should always be able to load as a
5469// legal equivalently-sized byte-typed vector instead. This method is
5470// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5471// the load is already correctly-aligned, it returns SDValue().
5472SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5473 SelectionDAG &DAG) const {
5474 auto *Load = cast<LoadSDNode>(Op);
5475 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5476
5478 Load->getMemoryVT(),
5479 *Load->getMemOperand()))
5480 return SDValue();
5481
5482 SDLoc DL(Op);
5483 MVT VT = Op.getSimpleValueType();
5484 unsigned EltSizeBits = VT.getScalarSizeInBits();
5485 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5486 "Unexpected unaligned RVV load type");
5487 MVT NewVT =
5488 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5489 assert(NewVT.isValid() &&
5490 "Expecting equally-sized RVV vector types to be legal");
5491 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5492 Load->getPointerInfo(), Load->getOriginalAlign(),
5493 Load->getMemOperand()->getFlags());
5494 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5495}
5496
5497// While RVV has alignment restrictions, we should always be able to store as a
5498// legal equivalently-sized byte-typed vector instead. This method is
5499// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5500// returns SDValue() if the store is already correctly aligned.
5501SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5502 SelectionDAG &DAG) const {
5503 auto *Store = cast<StoreSDNode>(Op);
5504 assert(Store && Store->getValue().getValueType().isVector() &&
5505 "Expected vector store");
5506
5508 Store->getMemoryVT(),
5509 *Store->getMemOperand()))
5510 return SDValue();
5511
5512 SDLoc DL(Op);
5513 SDValue StoredVal = Store->getValue();
5514 MVT VT = StoredVal.getSimpleValueType();
5515 unsigned EltSizeBits = VT.getScalarSizeInBits();
5516 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5517 "Unexpected unaligned RVV store type");
5518 MVT NewVT =
5519 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5520 assert(NewVT.isValid() &&
5521 "Expecting equally-sized RVV vector types to be legal");
5522 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5523 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5524 Store->getPointerInfo(), Store->getOriginalAlign(),
5525 Store->getMemOperand()->getFlags());
5526}
5527
5529 const RISCVSubtarget &Subtarget) {
5530 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5531
5532 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5533
5534 // All simm32 constants should be handled by isel.
5535 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5536 // this check redundant, but small immediates are common so this check
5537 // should have better compile time.
5538 if (isInt<32>(Imm))
5539 return Op;
5540
5541 // We only need to cost the immediate, if constant pool lowering is enabled.
5542 if (!Subtarget.useConstantPoolForLargeInts())
5543 return Op;
5544
5546 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5547 return Op;
5548
5549 // Optimizations below are disabled for opt size. If we're optimizing for
5550 // size, use a constant pool.
5551 if (DAG.shouldOptForSize())
5552 return SDValue();
5553
5554 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5555 // that if it will avoid a constant pool.
5556 // It will require an extra temporary register though.
5557 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5558 // low and high 32 bits are the same and bit 31 and 63 are set.
5559 unsigned ShiftAmt, AddOpc;
5560 RISCVMatInt::InstSeq SeqLo =
5561 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
5562 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5563 return Op;
5564
5565 return SDValue();
5566}
5567
5569 const RISCVSubtarget &Subtarget) {
5570 SDLoc dl(Op);
5571 AtomicOrdering FenceOrdering =
5572 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5573 SyncScope::ID FenceSSID =
5574 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5575
5576 if (Subtarget.hasStdExtZtso()) {
5577 // The only fence that needs an instruction is a sequentially-consistent
5578 // cross-thread fence.
5579 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5580 FenceSSID == SyncScope::System)
5581 return Op;
5582
5583 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5584 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5585 }
5586
5587 // singlethread fences only synchronize with signal handlers on the same
5588 // thread and thus only need to preserve instruction order, not actually
5589 // enforce memory ordering.
5590 if (FenceSSID == SyncScope::SingleThread)
5591 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5592 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5593
5594 return Op;
5595}
5596
5597SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5598 SelectionDAG &DAG) const {
5599 SDLoc DL(Op);
5600 MVT VT = Op.getSimpleValueType();
5601 MVT XLenVT = Subtarget.getXLenVT();
5602 unsigned Check = Op.getConstantOperandVal(1);
5603 unsigned TDCMask = 0;
5604 if (Check & fcSNan)
5605 TDCMask |= RISCV::FPMASK_Signaling_NaN;
5606 if (Check & fcQNan)
5607 TDCMask |= RISCV::FPMASK_Quiet_NaN;
5608 if (Check & fcPosInf)
5610 if (Check & fcNegInf)
5612 if (Check & fcPosNormal)
5614 if (Check & fcNegNormal)
5616 if (Check & fcPosSubnormal)
5618 if (Check & fcNegSubnormal)
5620 if (Check & fcPosZero)
5621 TDCMask |= RISCV::FPMASK_Positive_Zero;
5622 if (Check & fcNegZero)
5623 TDCMask |= RISCV::FPMASK_Negative_Zero;
5624
5625 bool IsOneBitMask = isPowerOf2_32(TDCMask);
5626
5627 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
5628
5629 if (VT.isVector()) {
5630 SDValue Op0 = Op.getOperand(0);
5631 MVT VT0 = Op.getOperand(0).getSimpleValueType();
5632
5633 if (VT.isScalableVector()) {
5635 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
5636 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5637 Mask = Op.getOperand(2);
5638 VL = Op.getOperand(3);
5639 }
5640 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
5641 VL, Op->getFlags());
5642 if (IsOneBitMask)
5643 return DAG.getSetCC(DL, VT, FPCLASS,
5644 DAG.getConstant(TDCMask, DL, DstVT),
5646 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
5647 DAG.getConstant(TDCMask, DL, DstVT));
5648 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
5649 ISD::SETNE);
5650 }
5651
5652 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
5653 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5654 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
5655 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
5656 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5657 Mask = Op.getOperand(2);
5658 MVT MaskContainerVT =
5659 getContainerForFixedLengthVector(Mask.getSimpleValueType());
5660 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
5661 VL = Op.getOperand(3);
5662 }
5663 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
5664
5665 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
5666 Mask, VL, Op->getFlags());
5667
5668 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5669 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
5670 if (IsOneBitMask) {
5671 SDValue VMSEQ =
5672 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5673 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
5674 DAG.getUNDEF(ContainerVT), Mask, VL});
5675 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
5676 }
5677 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
5678 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
5679
5680 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
5681 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5682 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
5683
5684 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5685 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
5686 DAG.getUNDEF(ContainerVT), Mask, VL});
5687 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
5688 }
5689
5690 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
5691 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
5692 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
5694 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
5695}
5696
5697// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5698// operations propagate nans.
5700 const RISCVSubtarget &Subtarget) {
5701 SDLoc DL(Op);
5702 MVT VT = Op.getSimpleValueType();
5703
5704 SDValue X = Op.getOperand(0);
5705 SDValue Y = Op.getOperand(1);
5706
5707 if (!VT.isVector()) {
5708 MVT XLenVT = Subtarget.getXLenVT();
5709
5710 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5711 // ensures that when one input is a nan, the other will also be a nan
5712 // allowing the nan to propagate. If both inputs are nan, this will swap the
5713 // inputs which is harmless.
5714
5715 SDValue NewY = Y;
5716 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
5717 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
5718 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
5719 }
5720
5721 SDValue NewX = X;
5722 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
5723 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
5724 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
5725 }
5726
5727 unsigned Opc =
5728 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
5729 return DAG.getNode(Opc, DL, VT, NewX, NewY);
5730 }
5731
5732 // Check no NaNs before converting to fixed vector scalable.
5733 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
5734 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
5735
5736 MVT ContainerVT = VT;
5737 if (VT.isFixedLengthVector()) {
5738 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5739 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
5740 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
5741 }
5742
5743 SDValue Mask, VL;
5744 if (Op->isVPOpcode()) {
5745 Mask = Op.getOperand(2);
5746 if (VT.isFixedLengthVector())
5747 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5748 Subtarget);
5749 VL = Op.getOperand(3);
5750 } else {
5751 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5752 }
5753
5754 SDValue NewY = Y;
5755 if (!XIsNeverNan) {
5756 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5757 {X, X, DAG.getCondCode(ISD::SETOEQ),
5758 DAG.getUNDEF(ContainerVT), Mask, VL});
5759 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
5760 DAG.getUNDEF(ContainerVT), VL);
5761 }
5762
5763 SDValue NewX = X;
5764 if (!YIsNeverNan) {
5765 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5766 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
5767 DAG.getUNDEF(ContainerVT), Mask, VL});
5768 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
5769 DAG.getUNDEF(ContainerVT), VL);
5770 }
5771
5772 unsigned Opc =
5773 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
5776 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
5777 DAG.getUNDEF(ContainerVT), Mask, VL);
5778 if (VT.isFixedLengthVector())
5779 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
5780 return Res;
5781}
5782
5783/// Get a RISC-V target specified VL op for a given SDNode.
5784static unsigned getRISCVVLOp(SDValue Op) {
5785#define OP_CASE(NODE) \
5786 case ISD::NODE: \
5787 return RISCVISD::NODE##_VL;
5788#define VP_CASE(NODE) \
5789 case ISD::VP_##NODE: \
5790 return RISCVISD::NODE##_VL;
5791 // clang-format off
5792 switch (Op.getOpcode()) {
5793 default:
5794 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5795 OP_CASE(ADD)
5796 OP_CASE(SUB)
5797 OP_CASE(MUL)
5798 OP_CASE(MULHS)
5799 OP_CASE(MULHU)
5800 OP_CASE(SDIV)
5801 OP_CASE(SREM)
5802 OP_CASE(UDIV)
5803 OP_CASE(UREM)
5804 OP_CASE(SHL)
5805 OP_CASE(SRA)
5806 OP_CASE(SRL)
5807 OP_CASE(ROTL)
5808 OP_CASE(ROTR)
5809 OP_CASE(BSWAP)
5810 OP_CASE(CTTZ)
5811 OP_CASE(CTLZ)
5812 OP_CASE(CTPOP)
5813 OP_CASE(BITREVERSE)
5814 OP_CASE(SADDSAT)
5815 OP_CASE(UADDSAT)
5816 OP_CASE(SSUBSAT)
5817 OP_CASE(USUBSAT)
5818 OP_CASE(AVGFLOORS)
5819 OP_CASE(AVGFLOORU)
5820 OP_CASE(AVGCEILS)
5821 OP_CASE(AVGCEILU)
5822 OP_CASE(FADD)
5823 OP_CASE(FSUB)
5824 OP_CASE(FMUL)
5825 OP_CASE(FDIV)
5826 OP_CASE(FNEG)
5827 OP_CASE(FABS)
5828 OP_CASE(FSQRT)
5829 OP_CASE(SMIN)
5830 OP_CASE(SMAX)
5831 OP_CASE(UMIN)
5832 OP_CASE(UMAX)
5833 OP_CASE(STRICT_FADD)
5834 OP_CASE(STRICT_FSUB)
5835 OP_CASE(STRICT_FMUL)
5836 OP_CASE(STRICT_FDIV)
5837 OP_CASE(STRICT_FSQRT)
5838 VP_CASE(ADD) // VP_ADD
5839 VP_CASE(SUB) // VP_SUB
5840 VP_CASE(MUL) // VP_MUL
5841 VP_CASE(SDIV) // VP_SDIV
5842 VP_CASE(SREM) // VP_SREM
5843 VP_CASE(UDIV) // VP_UDIV
5844 VP_CASE(UREM) // VP_UREM
5845 VP_CASE(SHL) // VP_SHL
5846 VP_CASE(FADD) // VP_FADD
5847 VP_CASE(FSUB) // VP_FSUB
5848 VP_CASE(FMUL) // VP_FMUL
5849 VP_CASE(FDIV) // VP_FDIV
5850 VP_CASE(FNEG) // VP_FNEG
5851 VP_CASE(FABS) // VP_FABS
5852 VP_CASE(SMIN) // VP_SMIN
5853 VP_CASE(SMAX) // VP_SMAX
5854 VP_CASE(UMIN) // VP_UMIN
5855 VP_CASE(UMAX) // VP_UMAX
5856 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
5857 VP_CASE(SETCC) // VP_SETCC
5858 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
5859 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
5860 VP_CASE(BITREVERSE) // VP_BITREVERSE
5861 VP_CASE(SADDSAT) // VP_SADDSAT
5862 VP_CASE(UADDSAT) // VP_UADDSAT
5863 VP_CASE(SSUBSAT) // VP_SSUBSAT
5864 VP_CASE(USUBSAT) // VP_USUBSAT
5865 VP_CASE(BSWAP) // VP_BSWAP
5866 VP_CASE(CTLZ) // VP_CTLZ
5867 VP_CASE(CTTZ) // VP_CTTZ
5868 VP_CASE(CTPOP) // VP_CTPOP
5870 case ISD::VP_CTLZ_ZERO_UNDEF:
5871 return RISCVISD::CTLZ_VL;
5873 case ISD::VP_CTTZ_ZERO_UNDEF:
5874 return RISCVISD::CTTZ_VL;
5875 case ISD::FMA:
5876 case ISD::VP_FMA:
5877 return RISCVISD::VFMADD_VL;
5878 case ISD::STRICT_FMA:
5880 case ISD::AND:
5881 case ISD::VP_AND:
5882 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5883 return RISCVISD::VMAND_VL;
5884 return RISCVISD::AND_VL;
5885 case ISD::OR:
5886 case ISD::VP_OR:
5887 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5888 return RISCVISD::VMOR_VL;
5889 return RISCVISD::OR_VL;
5890 case ISD::XOR:
5891 case ISD::VP_XOR:
5892 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5893 return RISCVISD::VMXOR_VL;
5894 return RISCVISD::XOR_VL;
5895 case ISD::VP_SELECT:
5896 case ISD::VP_MERGE:
5897 return RISCVISD::VMERGE_VL;
5898 case ISD::VP_SRA:
5899 return RISCVISD::SRA_VL;
5900 case ISD::VP_SRL:
5901 return RISCVISD::SRL_VL;
5902 case ISD::VP_SQRT:
5903 return RISCVISD::FSQRT_VL;
5904 case ISD::VP_SIGN_EXTEND:
5905 return RISCVISD::VSEXT_VL;
5906 case ISD::VP_ZERO_EXTEND:
5907 return RISCVISD::VZEXT_VL;
5908 case ISD::VP_FP_TO_SINT:
5910 case ISD::VP_FP_TO_UINT:
5912 case ISD::FMINNUM:
5913 case ISD::VP_FMINNUM:
5914 return RISCVISD::VFMIN_VL;
5915 case ISD::FMAXNUM:
5916 case ISD::VP_FMAXNUM:
5917 return RISCVISD::VFMAX_VL;
5918 case ISD::LRINT:
5919 case ISD::VP_LRINT:
5920 case ISD::LLRINT:
5921 case ISD::VP_LLRINT:
5923 }
5924 // clang-format on
5925#undef OP_CASE
5926#undef VP_CASE
5927}
5928
5929/// Return true if a RISC-V target specified op has a passthru operand.
5930static bool hasPassthruOp(unsigned Opcode) {
5931 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5933 "not a RISC-V target specific op");
5935 130 &&
5938 21 &&
5939 "adding target specific op should update this function");
5940 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
5941 return true;
5942 if (Opcode == RISCVISD::FCOPYSIGN_VL)
5943 return true;
5944 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
5945 return true;
5946 if (Opcode == RISCVISD::SETCC_VL)
5947 return true;
5948 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
5949 return true;
5950 if (Opcode == RISCVISD::VMERGE_VL)
5951 return true;
5952 return false;
5953}
5954
5955/// Return true if a RISC-V target specified op has a mask operand.
5956static bool hasMaskOp(unsigned Opcode) {
5957 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5959 "not a RISC-V target specific op");
5961 130 &&
5964 21 &&
5965 "adding target specific op should update this function");
5966 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
5967 return true;
5968 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
5969 return true;
5970 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
5972 return true;
5973 return false;
5974}
5975
5977 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5978 SDLoc DL(Op);
5979
5982
5983 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5984 if (!Op.getOperand(j).getValueType().isVector()) {
5985 LoOperands[j] = Op.getOperand(j);
5986 HiOperands[j] = Op.getOperand(j);
5987 continue;
5988 }
5989 std::tie(LoOperands[j], HiOperands[j]) =
5990 DAG.SplitVector(Op.getOperand(j), DL);
5991 }
5992
5993 SDValue LoRes =
5994 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
5995 SDValue HiRes =
5996 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
5997
5998 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
5999}
6000
6002 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
6003 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6004 SDLoc DL(Op);
6005
6008
6009 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6010 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
6011 std::tie(LoOperands[j], HiOperands[j]) =
6012 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
6013 continue;
6014 }
6015 if (!Op.getOperand(j).getValueType().isVector()) {
6016 LoOperands[j] = Op.getOperand(j);
6017 HiOperands[j] = Op.getOperand(j);
6018 continue;
6019 }
6020 std::tie(LoOperands[j], HiOperands[j]) =
6021 DAG.SplitVector(Op.getOperand(j), DL);
6022 }
6023
6024 SDValue LoRes =
6025 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6026 SDValue HiRes =
6027 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6028
6029 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6030}
6031
6033 SDLoc DL(Op);
6034
6035 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
6036 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
6037 auto [EVLLo, EVLHi] =
6038 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
6039
6040 SDValue ResLo =
6041 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6042 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
6043 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6044 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
6045}
6046
6048
6049 assert(Op->isStrictFPOpcode());
6050
6051 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
6052
6053 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
6054 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
6055
6056 SDLoc DL(Op);
6057
6060
6061 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6062 if (!Op.getOperand(j).getValueType().isVector()) {
6063 LoOperands[j] = Op.getOperand(j);
6064 HiOperands[j] = Op.getOperand(j);
6065 continue;
6066 }
6067 std::tie(LoOperands[j], HiOperands[j]) =
6068 DAG.SplitVector(Op.getOperand(j), DL);
6069 }
6070
6071 SDValue LoRes =
6072 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6073 HiOperands[0] = LoRes.getValue(1);
6074 SDValue HiRes =
6075 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6076
6077 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6078 LoRes.getValue(0), HiRes.getValue(0));
6079 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6080}
6081
6083 SelectionDAG &DAG) const {
6084 switch (Op.getOpcode()) {
6085 default:
6086 report_fatal_error("unimplemented operand");
6087 case ISD::ATOMIC_FENCE:
6088 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6089 case ISD::GlobalAddress:
6090 return lowerGlobalAddress(Op, DAG);
6091 case ISD::BlockAddress:
6092 return lowerBlockAddress(Op, DAG);
6093 case ISD::ConstantPool:
6094 return lowerConstantPool(Op, DAG);
6095 case ISD::JumpTable:
6096 return lowerJumpTable(Op, DAG);
6098 return lowerGlobalTLSAddress(Op, DAG);
6099 case ISD::Constant:
6100 return lowerConstant(Op, DAG, Subtarget);
6101 case ISD::SELECT:
6102 return lowerSELECT(Op, DAG);
6103 case ISD::BRCOND:
6104 return lowerBRCOND(Op, DAG);
6105 case ISD::VASTART:
6106 return lowerVASTART(Op, DAG);
6107 case ISD::FRAMEADDR:
6108 return lowerFRAMEADDR(Op, DAG);
6109 case ISD::RETURNADDR:
6110 return lowerRETURNADDR(Op, DAG);
6111 case ISD::SHL_PARTS:
6112 return lowerShiftLeftParts(Op, DAG);
6113 case ISD::SRA_PARTS:
6114 return lowerShiftRightParts(Op, DAG, true);
6115 case ISD::SRL_PARTS:
6116 return lowerShiftRightParts(Op, DAG, false);
6117 case ISD::ROTL:
6118 case ISD::ROTR:
6119 if (Op.getValueType().isFixedLengthVector()) {
6120 assert(Subtarget.hasStdExtZvkb());
6121 return lowerToScalableOp(Op, DAG);
6122 }
6123 assert(Subtarget.hasVendorXTHeadBb() &&
6124 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6125 "Unexpected custom legalization");
6126 // XTHeadBb only supports rotate by constant.
6127 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6128 return SDValue();
6129 return Op;
6130 case ISD::BITCAST: {
6131 SDLoc DL(Op);
6132 EVT VT = Op.getValueType();
6133 SDValue Op0 = Op.getOperand(0);
6134 EVT Op0VT = Op0.getValueType();
6135 MVT XLenVT = Subtarget.getXLenVT();
6136 if (VT == MVT::f16 && Op0VT == MVT::i16 &&
6137 Subtarget.hasStdExtZfhminOrZhinxmin()) {
6138 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6139 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
6140 return FPConv;
6141 }
6142 if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
6143 Subtarget.hasStdExtZfbfmin()) {
6144 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6145 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
6146 return FPConv;
6147 }
6148 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6149 Subtarget.hasStdExtFOrZfinx()) {
6150 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6151 SDValue FPConv =
6152 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6153 return FPConv;
6154 }
6155 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32) {
6156 SDValue Lo, Hi;
6157 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6158 SDValue RetReg =
6159 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6160 return RetReg;
6161 }
6162
6163 // Consider other scalar<->scalar casts as legal if the types are legal.
6164 // Otherwise expand them.
6165 if (!VT.isVector() && !Op0VT.isVector()) {
6166 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6167 return Op;
6168 return SDValue();
6169 }
6170
6171 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6172 "Unexpected types");
6173
6174 if (VT.isFixedLengthVector()) {
6175 // We can handle fixed length vector bitcasts with a simple replacement
6176 // in isel.
6177 if (Op0VT.isFixedLengthVector())
6178 return Op;
6179 // When bitcasting from scalar to fixed-length vector, insert the scalar
6180 // into a one-element vector of the result type, and perform a vector
6181 // bitcast.
6182 if (!Op0VT.isVector()) {
6183 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6184 if (!isTypeLegal(BVT))
6185 return SDValue();
6186 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6187 DAG.getUNDEF(BVT), Op0,
6188 DAG.getVectorIdxConstant(0, DL)));
6189 }
6190 return SDValue();
6191 }
6192 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6193 // thus: bitcast the vector to a one-element vector type whose element type
6194 // is the same as the result type, and extract the first element.
6195 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6196 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6197 if (!isTypeLegal(BVT))
6198 return SDValue();
6199 SDValue BVec = DAG.getBitcast(BVT, Op0);
6200 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6201 DAG.getVectorIdxConstant(0, DL));
6202 }
6203 return SDValue();
6204 }
6206 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6208 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6210 return LowerINTRINSIC_VOID(Op, DAG);
6211 case ISD::IS_FPCLASS:
6212 return LowerIS_FPCLASS(Op, DAG);
6213 case ISD::BITREVERSE: {
6214 MVT VT = Op.getSimpleValueType();
6215 if (VT.isFixedLengthVector()) {
6216 assert(Subtarget.hasStdExtZvbb());
6217 return lowerToScalableOp(Op, DAG);
6218 }
6219 SDLoc DL(Op);
6220 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6221 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6222 // Expand bitreverse to a bswap(rev8) followed by brev8.
6223 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6224 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6225 }
6226 case ISD::TRUNCATE:
6227 // Only custom-lower vector truncates
6228 if (!Op.getSimpleValueType().isVector())
6229 return Op;
6230 return lowerVectorTruncLike(Op, DAG);
6231 case ISD::ANY_EXTEND:
6232 case ISD::ZERO_EXTEND:
6233 if (Op.getOperand(0).getValueType().isVector() &&
6234 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6235 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6236 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6237 case ISD::SIGN_EXTEND:
6238 if (Op.getOperand(0).getValueType().isVector() &&
6239 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6240 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6241 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6243 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6245 return lowerINSERT_VECTOR_ELT(Op, DAG);
6247 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6248 case ISD::SCALAR_TO_VECTOR: {
6249 MVT VT = Op.getSimpleValueType();
6250 SDLoc DL(Op);
6251 SDValue Scalar = Op.getOperand(0);
6252 if (VT.getVectorElementType() == MVT::i1) {
6253 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6254 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6255 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6256 }
6257 MVT ContainerVT = VT;
6258 if (VT.isFixedLengthVector())
6259 ContainerVT = getContainerForFixedLengthVector(VT);
6260 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6261 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6262 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6263 DAG.getUNDEF(ContainerVT), Scalar, VL);
6264 if (VT.isFixedLengthVector())
6265 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6266 return V;
6267 }
6268 case ISD::VSCALE: {
6269 MVT XLenVT = Subtarget.getXLenVT();
6270 MVT VT = Op.getSimpleValueType();
6271 SDLoc DL(Op);
6272 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6273 // We define our scalable vector types for lmul=1 to use a 64 bit known
6274 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6275 // vscale as VLENB / 8.
6276 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6277 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6278 report_fatal_error("Support for VLEN==32 is incomplete.");
6279 // We assume VLENB is a multiple of 8. We manually choose the best shift
6280 // here because SimplifyDemandedBits isn't always able to simplify it.
6281 uint64_t Val = Op.getConstantOperandVal(0);
6282 if (isPowerOf2_64(Val)) {
6283 uint64_t Log2 = Log2_64(Val);
6284 if (Log2 < 3)
6285 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6286 DAG.getConstant(3 - Log2, DL, VT));
6287 else if (Log2 > 3)
6288 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6289 DAG.getConstant(Log2 - 3, DL, XLenVT));
6290 } else if ((Val % 8) == 0) {
6291 // If the multiplier is a multiple of 8, scale it down to avoid needing
6292 // to shift the VLENB value.
6293 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6294 DAG.getConstant(Val / 8, DL, XLenVT));
6295 } else {
6296 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6297 DAG.getConstant(3, DL, XLenVT));
6298 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6299 DAG.getConstant(Val, DL, XLenVT));
6300 }
6301 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6302 }
6303 case ISD::FPOWI: {
6304 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6305 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6306 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6307 Op.getOperand(1).getValueType() == MVT::i32) {
6308 SDLoc DL(Op);
6309 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6310 SDValue Powi =
6311 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6312 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6313 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6314 }
6315 return SDValue();
6316 }
6317 case ISD::FMAXIMUM:
6318 case ISD::FMINIMUM:
6319 if (Op.getValueType() == MVT::nxv32f16 &&
6320 (Subtarget.hasVInstructionsF16Minimal() &&
6321 !Subtarget.hasVInstructionsF16()))
6322 return SplitVectorOp(Op, DAG);
6323 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6324 case ISD::FP_EXTEND: {
6325 SDLoc DL(Op);
6326 EVT VT = Op.getValueType();
6327 SDValue Op0 = Op.getOperand(0);
6328 EVT Op0VT = Op0.getValueType();
6329 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
6330 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6331 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
6332 SDValue FloatVal =
6333 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6334 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
6335 }
6336
6337 if (!Op.getValueType().isVector())
6338 return Op;
6339 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6340 }
6341 case ISD::FP_ROUND: {
6342 SDLoc DL(Op);
6343 EVT VT = Op.getValueType();
6344 SDValue Op0 = Op.getOperand(0);
6345 EVT Op0VT = Op0.getValueType();
6346 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
6347 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
6348 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
6349 Subtarget.hasStdExtDOrZdinx()) {
6350 SDValue FloatVal =
6351 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
6352 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6353 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
6354 }
6355
6356 if (!Op.getValueType().isVector())
6357 return Op;
6358 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6359 }
6362 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6363 case ISD::SINT_TO_FP:
6364 case ISD::UINT_TO_FP:
6365 if (Op.getValueType().isVector() &&
6366 Op.getValueType().getScalarType() == MVT::f16 &&
6367 (Subtarget.hasVInstructionsF16Minimal() &&
6368 !Subtarget.hasVInstructionsF16())) {
6369 if (Op.getValueType() == MVT::nxv32f16)
6370 return SplitVectorOp(Op, DAG);
6371 // int -> f32
6372 SDLoc DL(Op);
6373 MVT NVT =
6374 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6375 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6376 // f32 -> f16
6377 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6378 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6379 }
6380 [[fallthrough]];
6381 case ISD::FP_TO_SINT:
6382 case ISD::FP_TO_UINT:
6383 if (SDValue Op1 = Op.getOperand(0);
6384 Op1.getValueType().isVector() &&
6385 Op1.getValueType().getScalarType() == MVT::f16 &&
6386 (Subtarget.hasVInstructionsF16Minimal() &&
6387 !Subtarget.hasVInstructionsF16())) {
6388 if (Op1.getValueType() == MVT::nxv32f16)
6389 return SplitVectorOp(Op, DAG);
6390 // f16 -> f32
6391 SDLoc DL(Op);
6392 MVT NVT = MVT::getVectorVT(MVT::f32,
6393 Op1.getValueType().getVectorElementCount());
6394 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6395 // f32 -> int
6396 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6397 }
6398 [[fallthrough]];
6403 // RVV can only do fp<->int conversions to types half/double the size as
6404 // the source. We custom-lower any conversions that do two hops into
6405 // sequences.
6406 MVT VT = Op.getSimpleValueType();
6407 if (!VT.isVector())
6408 return Op;
6409 SDLoc DL(Op);
6410 bool IsStrict = Op->isStrictFPOpcode();
6411 SDValue Src = Op.getOperand(0 + IsStrict);
6412 MVT EltVT = VT.getVectorElementType();
6413 MVT SrcVT = Src.getSimpleValueType();
6414 MVT SrcEltVT = SrcVT.getVectorElementType();
6415 unsigned EltSize = EltVT.getSizeInBits();
6416 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6417 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6418 "Unexpected vector element types");
6419
6420 bool IsInt2FP = SrcEltVT.isInteger();
6421 // Widening conversions
6422 if (EltSize > (2 * SrcEltSize)) {
6423 if (IsInt2FP) {
6424 // Do a regular integer sign/zero extension then convert to float.
6425 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6427 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6428 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6431 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6432 if (IsStrict)
6433 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6434 Op.getOperand(0), Ext);
6435 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6436 }
6437 // FP2Int
6438 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6439 // Do one doubling fp_extend then complete the operation by converting
6440 // to int.
6441 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6442 if (IsStrict) {
6443 auto [FExt, Chain] =
6444 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6445 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6446 }
6447 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6448 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6449 }
6450
6451 // Narrowing conversions
6452 if (SrcEltSize > (2 * EltSize)) {
6453 if (IsInt2FP) {
6454 // One narrowing int_to_fp, then an fp_round.
6455 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6456 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6457 if (IsStrict) {
6458 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6459 DAG.getVTList(InterimFVT, MVT::Other),
6460 Op.getOperand(0), Src);
6461 SDValue Chain = Int2FP.getValue(1);
6462 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
6463 }
6464 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
6465 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
6466 }
6467 // FP2Int
6468 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6469 // representable by the integer, the result is poison.
6470 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6472 if (IsStrict) {
6473 SDValue FP2Int =
6474 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
6475 Op.getOperand(0), Src);
6476 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6477 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
6478 }
6479 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
6480 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6481 }
6482
6483 // Scalable vectors can exit here. Patterns will handle equally-sized
6484 // conversions halving/doubling ones.
6485 if (!VT.isFixedLengthVector())
6486 return Op;
6487
6488 // For fixed-length vectors we lower to a custom "VL" node.
6489 unsigned RVVOpc = 0;
6490 switch (Op.getOpcode()) {
6491 default:
6492 llvm_unreachable("Impossible opcode");
6493 case ISD::FP_TO_SINT:
6495 break;
6496 case ISD::FP_TO_UINT:
6498 break;
6499 case ISD::SINT_TO_FP:
6500 RVVOpc = RISCVISD::SINT_TO_FP_VL;
6501 break;
6502 case ISD::UINT_TO_FP:
6503 RVVOpc = RISCVISD::UINT_TO_FP_VL;
6504 break;
6507 break;
6510 break;
6513 break;
6516 break;
6517 }
6518
6519 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6520 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6521 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6522 "Expected same element count");
6523
6524 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6525
6526 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6527 if (IsStrict) {
6528 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6529 Op.getOperand(0), Src, Mask, VL);
6530 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
6531 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
6532 }
6533 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
6534 return convertFromScalableVector(VT, Src, DAG, Subtarget);
6535 }
6538 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6539 case ISD::FP_TO_BF16: {
6540 // Custom lower to ensure the libcall return is passed in an FPR on hard
6541 // float ABIs.
6542 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
6543 SDLoc DL(Op);
6544 MakeLibCallOptions CallOptions;
6545 RTLIB::Libcall LC =
6546 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
6547 SDValue Res =
6548 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6549 if (Subtarget.is64Bit())
6550 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6551 return DAG.getBitcast(MVT::i32, Res);
6552 }
6553 case ISD::BF16_TO_FP: {
6554 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6555 MVT VT = Op.getSimpleValueType();
6556 SDLoc DL(Op);
6557 Op = DAG.getNode(
6558 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
6559 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
6560 SDValue Res = Subtarget.is64Bit()
6561 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
6562 : DAG.getBitcast(MVT::f32, Op);
6563 // fp_extend if the target VT is bigger than f32.
6564 if (VT != MVT::f32)
6565 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
6566 return Res;
6567 }
6568 case ISD::FP_TO_FP16: {
6569 // Custom lower to ensure the libcall return is passed in an FPR on hard
6570 // float ABIs.
6571 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6572 SDLoc DL(Op);
6573 MakeLibCallOptions CallOptions;
6574 RTLIB::Libcall LC =
6575 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
6576 SDValue Res =
6577 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6578 if (Subtarget.is64Bit())
6579 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6580 return DAG.getBitcast(MVT::i32, Res);
6581 }
6582 case ISD::FP16_TO_FP: {
6583 // Custom lower to ensure the libcall argument is passed in an FPR on hard
6584 // float ABIs.
6585 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6586 SDLoc DL(Op);
6587 MakeLibCallOptions CallOptions;
6588 SDValue Arg = Subtarget.is64Bit()
6589 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
6590 Op.getOperand(0))
6591 : DAG.getBitcast(MVT::f32, Op.getOperand(0));
6592 SDValue Res =
6593 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
6594 .first;
6595 return Res;
6596 }
6597 case ISD::FTRUNC:
6598 case ISD::FCEIL:
6599 case ISD::FFLOOR:
6600 case ISD::FNEARBYINT:
6601 case ISD::FRINT:
6602 case ISD::FROUND:
6603 case ISD::FROUNDEVEN:
6604 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6605 case ISD::LRINT:
6606 case ISD::LLRINT:
6607 return lowerVectorXRINT(Op, DAG, Subtarget);
6608 case ISD::VECREDUCE_ADD:
6613 return lowerVECREDUCE(Op, DAG);
6614 case ISD::VECREDUCE_AND:
6615 case ISD::VECREDUCE_OR:
6616 case ISD::VECREDUCE_XOR:
6617 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6618 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
6619 return lowerVECREDUCE(Op, DAG);
6626 return lowerFPVECREDUCE(Op, DAG);
6627 case ISD::VP_REDUCE_ADD:
6628 case ISD::VP_REDUCE_UMAX:
6629 case ISD::VP_REDUCE_SMAX:
6630 case ISD::VP_REDUCE_UMIN:
6631 case ISD::VP_REDUCE_SMIN:
6632 case ISD::VP_REDUCE_FADD:
6633 case ISD::VP_REDUCE_SEQ_FADD:
6634 case ISD::VP_REDUCE_FMIN:
6635 case ISD::VP_REDUCE_FMAX:
6636 case ISD::VP_REDUCE_FMINIMUM:
6637 case ISD::VP_REDUCE_FMAXIMUM:
6638 if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
6639 (Subtarget.hasVInstructionsF16Minimal() &&
6640 !Subtarget.hasVInstructionsF16()))
6641 return SplitVectorReductionOp(Op, DAG);
6642 return lowerVPREDUCE(Op, DAG);
6643 case ISD::VP_REDUCE_AND:
6644 case ISD::VP_REDUCE_OR:
6645 case ISD::VP_REDUCE_XOR:
6646 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
6647 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
6648 return lowerVPREDUCE(Op, DAG);
6649 case ISD::VP_CTTZ_ELTS:
6650 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
6651 return lowerVPCttzElements(Op, DAG);
6652 case ISD::UNDEF: {
6653 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
6654 return convertFromScalableVector(Op.getSimpleValueType(),
6655 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
6656 }
6658 return lowerINSERT_SUBVECTOR(Op, DAG);
6660 return lowerEXTRACT_SUBVECTOR(Op, DAG);
6662 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
6664 return lowerVECTOR_INTERLEAVE(Op, DAG);
6665 case ISD::STEP_VECTOR:
6666 return lowerSTEP_VECTOR(Op, DAG);
6668 return lowerVECTOR_REVERSE(Op, DAG);
6669 case ISD::VECTOR_SPLICE:
6670 return lowerVECTOR_SPLICE(Op, DAG);
6671 case ISD::BUILD_VECTOR:
6672 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
6673 case ISD::SPLAT_VECTOR:
6674 if ((Op.getValueType().getScalarType() == MVT::f16 &&
6675 (Subtarget.hasVInstructionsF16Minimal() &&
6676 Subtarget.hasStdExtZfhminOrZhinxmin() &&
6677 !Subtarget.hasVInstructionsF16())) ||
6678 (Op.getValueType().getScalarType() == MVT::bf16 &&
6679 (Subtarget.hasVInstructionsBF16Minimal() &&
6680 Subtarget.hasStdExtZfbfmin()))) {
6681 if (Op.getValueType() == MVT::nxv32f16 ||
6682 Op.getValueType() == MVT::nxv32bf16)
6683 return SplitVectorOp(Op, DAG);
6684 SDLoc DL(Op);
6685 SDValue NewScalar =
6686 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6687 SDValue NewSplat = DAG.getNode(
6689 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),
6690 NewScalar);
6691 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat,
6692 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6693 }
6694 if (Op.getValueType().getVectorElementType() == MVT::i1)
6695 return lowerVectorMaskSplat(Op, DAG);
6696 return SDValue();
6698 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
6699 case ISD::CONCAT_VECTORS: {
6700 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6701 // better than going through the stack, as the default expansion does.
6702 SDLoc DL(Op);
6703 MVT VT = Op.getSimpleValueType();
6704 MVT ContainerVT = VT;
6705 if (VT.isFixedLengthVector())
6706 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
6707
6708 // Recursively split concat_vectors with more than 2 operands:
6709 //
6710 // concat_vector op1, op2, op3, op4
6711 // ->
6712 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
6713 //
6714 // This reduces the length of the chain of vslideups and allows us to
6715 // perform the vslideups at a smaller LMUL, limited to MF2.
6716 if (Op.getNumOperands() > 2 &&
6717 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
6718 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6720 size_t HalfNumOps = Op.getNumOperands() / 2;
6721 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6722 Op->ops().take_front(HalfNumOps));
6723 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6724 Op->ops().drop_front(HalfNumOps));
6725 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6726 }
6727
6728 unsigned NumOpElts =
6729 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
6730 SDValue Vec = DAG.getUNDEF(VT);
6731 for (const auto &OpIdx : enumerate(Op->ops())) {
6732 SDValue SubVec = OpIdx.value();
6733 // Don't insert undef subvectors.
6734 if (SubVec.isUndef())
6735 continue;
6736 Vec =
6737 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
6738 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
6739 }
6740 return Vec;
6741 }
6742 case ISD::LOAD:
6743 if (auto V = expandUnalignedRVVLoad(Op, DAG))
6744 return V;
6745 if (Op.getValueType().isFixedLengthVector())
6746 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
6747 return Op;
6748 case ISD::STORE:
6749 if (auto V = expandUnalignedRVVStore(Op, DAG))
6750 return V;
6751 if (Op.getOperand(1).getValueType().isFixedLengthVector())
6752 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
6753 return Op;
6754 case ISD::MLOAD:
6755 case ISD::VP_LOAD:
6756 return lowerMaskedLoad(Op, DAG);
6757 case ISD::MSTORE:
6758 case ISD::VP_STORE:
6759 return lowerMaskedStore(Op, DAG);
6760 case ISD::SELECT_CC: {
6761 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
6762 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6763 // into separate SETCC+SELECT just like LegalizeDAG.
6764 SDValue Tmp1 = Op.getOperand(0);
6765 SDValue Tmp2 = Op.getOperand(1);
6766 SDValue True = Op.getOperand(2);
6767 SDValue False = Op.getOperand(3);
6768 EVT VT = Op.getValueType();
6769 SDValue CC = Op.getOperand(4);
6770 EVT CmpVT = Tmp1.getValueType();
6771 EVT CCVT =
6772 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
6773 SDLoc DL(Op);
6774 SDValue Cond =
6775 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
6776 return DAG.getSelect(DL, VT, Cond, True, False);
6777 }
6778 case ISD::SETCC: {
6779 MVT OpVT = Op.getOperand(0).getSimpleValueType();
6780 if (OpVT.isScalarInteger()) {
6781 MVT VT = Op.getSimpleValueType();
6782 SDValue LHS = Op.getOperand(0);
6783 SDValue RHS = Op.getOperand(1);
6784 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6785 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
6786 "Unexpected CondCode");
6787
6788 SDLoc DL(Op);
6789
6790 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6791 // convert this to the equivalent of (set(u)ge X, C+1) by using
6792 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6793 // in a register.
6794 if (isa<ConstantSDNode>(RHS)) {
6795 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
6796 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
6797 // If this is an unsigned compare and the constant is -1, incrementing
6798 // the constant would change behavior. The result should be false.
6799 if (CCVal == ISD::SETUGT && Imm == -1)
6800 return DAG.getConstant(0, DL, VT);
6801 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6802 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6803 SDValue SetCC = DAG.getSetCC(
6804 DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal);
6805 return DAG.getLogicalNOT(DL, SetCC, VT);
6806 }
6807 }
6808
6809 // Not a constant we could handle, swap the operands and condition code to
6810 // SETLT/SETULT.
6811 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6812 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
6813 }
6814
6815 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6816 (Subtarget.hasVInstructionsF16Minimal() &&
6817 !Subtarget.hasVInstructionsF16()))
6818 return SplitVectorOp(Op, DAG);
6819
6820 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
6821 }
6822 case ISD::ADD:
6823 case ISD::SUB:
6824 case ISD::MUL:
6825 case ISD::MULHS:
6826 case ISD::MULHU:
6827 case ISD::AND:
6828 case ISD::OR:
6829 case ISD::XOR:
6830 case ISD::SDIV:
6831 case ISD::SREM:
6832 case ISD::UDIV:
6833 case ISD::UREM:
6834 case ISD::BSWAP:
6835 case ISD::CTPOP:
6836 return lowerToScalableOp(Op, DAG);
6837 case ISD::SHL:
6838 case ISD::SRA:
6839 case ISD::SRL:
6840 if (Op.getSimpleValueType().isFixedLengthVector())
6841 return lowerToScalableOp(Op, DAG);
6842 // This can be called for an i32 shift amount that needs to be promoted.
6843 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
6844 "Unexpected custom legalisation");
6845 return SDValue();
6846 case ISD::FADD:
6847 case ISD::FSUB:
6848 case ISD::FMUL:
6849 case ISD::FDIV:
6850 case ISD::FNEG:
6851 case ISD::FABS:
6852 case ISD::FSQRT:
6853 case ISD::FMA:
6854 case ISD::FMINNUM:
6855 case ISD::FMAXNUM:
6856 if (Op.getValueType() == MVT::nxv32f16 &&
6857 (Subtarget.hasVInstructionsF16Minimal() &&
6858 !Subtarget.hasVInstructionsF16()))
6859 return SplitVectorOp(Op, DAG);
6860 [[fallthrough]];
6861 case ISD::AVGFLOORS:
6862 case ISD::AVGFLOORU:
6863 case ISD::AVGCEILS:
6864 case ISD::AVGCEILU:
6865 case ISD::SMIN:
6866 case ISD::SMAX:
6867 case ISD::UMIN:
6868 case ISD::UMAX:
6869 return lowerToScalableOp(Op, DAG);
6870 case ISD::UADDSAT:
6871 case ISD::USUBSAT:
6872 return lowerToScalableOp(Op, DAG);
6873 case ISD::SADDSAT:
6874 case ISD::SSUBSAT:
6875 return lowerToScalableOp(Op, DAG);
6876 case ISD::ABDS:
6877 case ISD::ABDU: {
6878 SDLoc dl(Op);
6879 EVT VT = Op->getValueType(0);
6880 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
6881 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
6882 bool IsSigned = Op->getOpcode() == ISD::ABDS;
6883
6884 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
6885 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
6886 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
6887 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
6888 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
6889 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
6890 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
6891 }
6892 case ISD::ABS:
6893 case ISD::VP_ABS:
6894 return lowerABS(Op, DAG);
6895 case ISD::CTLZ:
6897 case ISD::CTTZ:
6899 if (Subtarget.hasStdExtZvbb())
6900 return lowerToScalableOp(Op, DAG);
6901 assert(Op.getOpcode() != ISD::CTTZ);
6902 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6903 case ISD::VSELECT:
6904 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
6905 case ISD::FCOPYSIGN:
6906 if (Op.getValueType() == MVT::nxv32f16 &&
6907 (Subtarget.hasVInstructionsF16Minimal() &&
6908 !Subtarget.hasVInstructionsF16()))
6909 return SplitVectorOp(Op, DAG);
6910 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
6911 case ISD::STRICT_FADD:
6912 case ISD::STRICT_FSUB:
6913 case ISD::STRICT_FMUL:
6914 case ISD::STRICT_FDIV:
6915 case ISD::STRICT_FSQRT:
6916 case ISD::STRICT_FMA:
6917 if (Op.getValueType() == MVT::nxv32f16 &&
6918 (Subtarget.hasVInstructionsF16Minimal() &&
6919 !Subtarget.hasVInstructionsF16()))
6920 return SplitStrictFPVectorOp(Op, DAG);
6921 return lowerToScalableOp(Op, DAG);
6922 case ISD::STRICT_FSETCC:
6924 return lowerVectorStrictFSetcc(Op, DAG);
6925 case ISD::STRICT_FCEIL:
6926 case ISD::STRICT_FRINT:
6927 case ISD::STRICT_FFLOOR:
6928 case ISD::STRICT_FTRUNC:
6930 case ISD::STRICT_FROUND:
6932 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6933 case ISD::MGATHER:
6934 case ISD::VP_GATHER:
6935 return lowerMaskedGather(Op, DAG);
6936 case ISD::MSCATTER:
6937 case ISD::VP_SCATTER:
6938 return lowerMaskedScatter(Op, DAG);
6939 case ISD::GET_ROUNDING:
6940 return lowerGET_ROUNDING(Op, DAG);
6941 case ISD::SET_ROUNDING:
6942 return lowerSET_ROUNDING(Op, DAG);
6943 case ISD::EH_DWARF_CFA:
6944 return lowerEH_DWARF_CFA(Op, DAG);
6945 case ISD::VP_SELECT:
6946 case ISD::VP_MERGE:
6947 case ISD::VP_ADD:
6948 case ISD::VP_SUB:
6949 case ISD::VP_MUL:
6950 case ISD::VP_SDIV:
6951 case ISD::VP_UDIV:
6952 case ISD::VP_SREM:
6953 case ISD::VP_UREM:
6954 case ISD::VP_UADDSAT:
6955 case ISD::VP_USUBSAT:
6956 case ISD::VP_SADDSAT:
6957 case ISD::VP_SSUBSAT:
6958 case ISD::VP_LRINT:
6959 case ISD::VP_LLRINT:
6960 return lowerVPOp(Op, DAG);
6961 case ISD::VP_AND:
6962 case ISD::VP_OR:
6963 case ISD::VP_XOR:
6964 return lowerLogicVPOp(Op, DAG);
6965 case ISD::VP_FADD:
6966 case ISD::VP_FSUB:
6967 case ISD::VP_FMUL:
6968 case ISD::VP_FDIV:
6969 case ISD::VP_FNEG:
6970 case ISD::VP_FABS:
6971 case ISD::VP_SQRT:
6972 case ISD::VP_FMA:
6973 case ISD::VP_FMINNUM:
6974 case ISD::VP_FMAXNUM:
6975 case ISD::VP_FCOPYSIGN:
6976 if (Op.getValueType() == MVT::nxv32f16 &&
6977 (Subtarget.hasVInstructionsF16Minimal() &&
6978 !Subtarget.hasVInstructionsF16()))
6979 return SplitVPOp(Op, DAG);
6980 [[fallthrough]];
6981 case ISD::VP_SRA:
6982 case ISD::VP_SRL:
6983 case ISD::VP_SHL:
6984 return lowerVPOp(Op, DAG);
6985 case ISD::VP_IS_FPCLASS:
6986 return LowerIS_FPCLASS(Op, DAG);
6987 case ISD::VP_SIGN_EXTEND:
6988 case ISD::VP_ZERO_EXTEND:
6989 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
6990 return lowerVPExtMaskOp(Op, DAG);
6991 return lowerVPOp(Op, DAG);
6992 case ISD::VP_TRUNCATE:
6993 return lowerVectorTruncLike(Op, DAG);
6994 case ISD::VP_FP_EXTEND:
6995 case ISD::VP_FP_ROUND:
6996 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6997 case ISD::VP_SINT_TO_FP:
6998 case ISD::VP_UINT_TO_FP:
6999 if (Op.getValueType().isVector() &&
7000 Op.getValueType().getScalarType() == MVT::f16 &&
7001 (Subtarget.hasVInstructionsF16Minimal() &&
7002 !Subtarget.hasVInstructionsF16())) {
7003 if (Op.getValueType() == MVT::nxv32f16)
7004 return SplitVPOp(Op, DAG);
7005 // int -> f32
7006 SDLoc DL(Op);
7007 MVT NVT =
7008 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7009 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7010 // f32 -> f16
7011 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7012 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7013 }
7014 [[fallthrough]];
7015 case ISD::VP_FP_TO_SINT:
7016 case ISD::VP_FP_TO_UINT:
7017 if (SDValue Op1 = Op.getOperand(0);
7018 Op1.getValueType().isVector() &&
7019 Op1.getValueType().getScalarType() == MVT::f16 &&
7020 (Subtarget.hasVInstructionsF16Minimal() &&
7021 !Subtarget.hasVInstructionsF16())) {
7022 if (Op1.getValueType() == MVT::nxv32f16)
7023 return SplitVPOp(Op, DAG);
7024 // f16 -> f32
7025 SDLoc DL(Op);
7026 MVT NVT = MVT::getVectorVT(MVT::f32,
7027 Op1.getValueType().getVectorElementCount());
7028 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7029 // f32 -> int
7030 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7031 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7032 }
7033 return lowerVPFPIntConvOp(Op, DAG);
7034 case ISD::VP_SETCC:
7035 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
7036 (Subtarget.hasVInstructionsF16Minimal() &&
7037 !Subtarget.hasVInstructionsF16()))
7038 return SplitVPOp(Op, DAG);
7039 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7040 return lowerVPSetCCMaskOp(Op, DAG);
7041 [[fallthrough]];
7042 case ISD::VP_SMIN:
7043 case ISD::VP_SMAX:
7044 case ISD::VP_UMIN:
7045 case ISD::VP_UMAX:
7046 case ISD::VP_BITREVERSE:
7047 case ISD::VP_BSWAP:
7048 return lowerVPOp(Op, DAG);
7049 case ISD::VP_CTLZ:
7050 case ISD::VP_CTLZ_ZERO_UNDEF:
7051 if (Subtarget.hasStdExtZvbb())
7052 return lowerVPOp(Op, DAG);
7053 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7054 case ISD::VP_CTTZ:
7055 case ISD::VP_CTTZ_ZERO_UNDEF:
7056 if (Subtarget.hasStdExtZvbb())
7057 return lowerVPOp(Op, DAG);
7058 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7059 case ISD::VP_CTPOP:
7060 return lowerVPOp(Op, DAG);
7061 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7062 return lowerVPStridedLoad(Op, DAG);
7063 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7064 return lowerVPStridedStore(Op, DAG);
7065 case ISD::VP_FCEIL:
7066 case ISD::VP_FFLOOR:
7067 case ISD::VP_FRINT:
7068 case ISD::VP_FNEARBYINT:
7069 case ISD::VP_FROUND:
7070 case ISD::VP_FROUNDEVEN:
7071 case ISD::VP_FROUNDTOZERO:
7072 if (Op.getValueType() == MVT::nxv32f16 &&
7073 (Subtarget.hasVInstructionsF16Minimal() &&
7074 !Subtarget.hasVInstructionsF16()))
7075 return SplitVPOp(Op, DAG);
7076 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7077 case ISD::VP_FMAXIMUM:
7078 case ISD::VP_FMINIMUM:
7079 if (Op.getValueType() == MVT::nxv32f16 &&
7080 (Subtarget.hasVInstructionsF16Minimal() &&
7081 !Subtarget.hasVInstructionsF16()))
7082 return SplitVPOp(Op, DAG);
7083 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7084 case ISD::EXPERIMENTAL_VP_SPLICE:
7085 return lowerVPSpliceExperimental(Op, DAG);
7086 case ISD::EXPERIMENTAL_VP_REVERSE:
7087 return lowerVPReverseExperimental(Op, DAG);
7088 case ISD::EXPERIMENTAL_VP_SPLAT:
7089 return lowerVPSplatExperimental(Op, DAG);
7090 case ISD::CLEAR_CACHE: {
7091 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
7092 "llvm.clear_cache only needs custom lower on Linux targets");
7093 SDLoc DL(Op);
7094 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7095 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
7096 Op.getOperand(2), Flags, DL);
7097 }
7098 }
7099}
7100
7101SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
7102 SDValue Start, SDValue End,
7103 SDValue Flags, SDLoc DL) const {
7104 MakeLibCallOptions CallOptions;
7105 std::pair<SDValue, SDValue> CallResult =
7106 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
7107 {Start, End, Flags}, CallOptions, DL, InChain);
7108
7109 // This function returns void so only the out chain matters.
7110 return CallResult.second;
7111}
7112
7114 SelectionDAG &DAG, unsigned Flags) {
7115 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7116}
7117
7119 SelectionDAG &DAG, unsigned Flags) {
7120 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7121 Flags);
7122}
7123
7125 SelectionDAG &DAG, unsigned Flags) {
7126 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7127 N->getOffset(), Flags);
7128}
7129
7131 SelectionDAG &DAG, unsigned Flags) {
7132 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7133}
7134
7135template <class NodeTy>
7136SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7137 bool IsLocal, bool IsExternWeak) const {
7138 SDLoc DL(N);
7139 EVT Ty = getPointerTy(DAG.getDataLayout());
7140
7141 // When HWASAN is used and tagging of global variables is enabled
7142 // they should be accessed via the GOT, since the tagged address of a global
7143 // is incompatible with existing code models. This also applies to non-pic
7144 // mode.
7145 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7146 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7147 if (IsLocal && !Subtarget.allowTaggedGlobals())
7148 // Use PC-relative addressing to access the symbol. This generates the
7149 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7150 // %pcrel_lo(auipc)).
7151 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7152
7153 // Use PC-relative addressing to access the GOT for this symbol, then load
7154 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7155 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7156 SDValue Load =
7157 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7163 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7164 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7165 return Load;
7166 }
7167
7168 switch (getTargetMachine().getCodeModel()) {
7169 default:
7170 report_fatal_error("Unsupported code model for lowering");
7171 case CodeModel::Small: {
7172 // Generate a sequence for accessing addresses within the first 2 GiB of
7173 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7174 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7175 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7176 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7177 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7178 }
7179 case CodeModel::Medium: {
7180 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7181 if (IsExternWeak) {
7182 // An extern weak symbol may be undefined, i.e. have value 0, which may
7183 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7184 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7185 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7186 SDValue Load =
7187 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7193 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7194 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7195 return Load;
7196 }
7197
7198 // Generate a sequence for accessing addresses within any 2GiB range within
7199 // the address space. This generates the pattern (PseudoLLA sym), which
7200 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7201 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7202 }
7203 }
7204}
7205
7206SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
7207 SelectionDAG &DAG) const {
7208 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7209 assert(N->getOffset() == 0 && "unexpected offset in global node");
7210 const GlobalValue *GV = N->getGlobal();
7211 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
7212}
7213
7214SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
7215 SelectionDAG &DAG) const {
7216 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
7217
7218 return getAddr(N, DAG);
7219}
7220
7221SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
7222 SelectionDAG &DAG) const {
7223 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
7224
7225 return getAddr(N, DAG);
7226}
7227
7228SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
7229 SelectionDAG &DAG) const {
7230 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
7231
7232 return getAddr(N, DAG);
7233}
7234
7235SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
7236 SelectionDAG &DAG,
7237 bool UseGOT) const {
7238 SDLoc DL(N);
7239 EVT Ty = getPointerTy(DAG.getDataLayout());
7240 const GlobalValue *GV = N->getGlobal();
7241 MVT XLenVT = Subtarget.getXLenVT();
7242
7243 if (UseGOT) {
7244 // Use PC-relative addressing to access the GOT for this TLS symbol, then
7245 // load the address from the GOT and add the thread pointer. This generates
7246 // the pattern (PseudoLA_TLS_IE sym), which expands to
7247 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
7248 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7249 SDValue Load =
7250 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
7256 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7257 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7258
7259 // Add the thread pointer.
7260 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7261 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
7262 }
7263
7264 // Generate a sequence for accessing the address relative to the thread
7265 // pointer, with the appropriate adjustment for the thread pointer offset.
7266 // This generates the pattern
7267 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
7268 SDValue AddrHi =
7270 SDValue AddrAdd =
7272 SDValue AddrLo =
7274
7275 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7276 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7277 SDValue MNAdd =
7278 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
7279 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
7280}
7281
7282SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
7283 SelectionDAG &DAG) const {
7284 SDLoc DL(N);
7285 EVT Ty = getPointerTy(DAG.getDataLayout());
7286 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
7287 const GlobalValue *GV = N->getGlobal();
7288
7289 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7290 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
7291 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
7292 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7293 SDValue Load =
7294 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
7295
7296 // Prepare argument list to generate call.
7298 ArgListEntry Entry;
7299 Entry.Node = Load;
7300 Entry.Ty = CallTy;
7301 Args.push_back(Entry);
7302
7303 // Setup call to __tls_get_addr.
7305 CLI.setDebugLoc(DL)
7306 .setChain(DAG.getEntryNode())
7307 .setLibCallee(CallingConv::C, CallTy,
7308 DAG.getExternalSymbol("__tls_get_addr", Ty),
7309 std::move(Args));
7310
7311 return LowerCallTo(CLI).first;
7312}
7313
7314SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
7315 SelectionDAG &DAG) const {
7316 SDLoc DL(N);
7317 EVT Ty = getPointerTy(DAG.getDataLayout());
7318 const GlobalValue *GV = N->getGlobal();
7319
7320 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7321 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
7322 //
7323 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
7324 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
7325 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
7326 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
7327 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7328 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
7329}
7330
7331SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
7332 SelectionDAG &DAG) const {
7333 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7334 assert(N->getOffset() == 0 && "unexpected offset in global node");
7335
7336 if (DAG.getTarget().useEmulatedTLS())
7337 return LowerToTLSEmulatedModel(N, DAG);
7338
7340
7343 report_fatal_error("In GHC calling convention TLS is not supported");
7344
7345 SDValue Addr;
7346 switch (Model) {
7348 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
7349 break;
7351 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
7352 break;
7355 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
7356 : getDynamicTLSAddr(N, DAG);
7357 break;
7358 }
7359
7360 return Addr;
7361}
7362
7363// Return true if Val is equal to (setcc LHS, RHS, CC).
7364// Return false if Val is the inverse of (setcc LHS, RHS, CC).
7365// Otherwise, return std::nullopt.
7366static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
7367 ISD::CondCode CC, SDValue Val) {
7368 assert(Val->getOpcode() == ISD::SETCC);
7369 SDValue LHS2 = Val.getOperand(0);
7370 SDValue RHS2 = Val.getOperand(1);
7371 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
7372
7373 if (LHS == LHS2 && RHS == RHS2) {
7374 if (CC == CC2)
7375 return true;
7376 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7377 return false;
7378 } else if (LHS == RHS2 && RHS == LHS2) {
7380 if (CC == CC2)
7381 return true;
7382 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7383 return false;
7384 }
7385
7386 return std::nullopt;
7387}
7388
7390 const RISCVSubtarget &Subtarget) {
7391 SDValue CondV = N->getOperand(0);
7392 SDValue TrueV = N->getOperand(1);
7393 SDValue FalseV = N->getOperand(2);
7394 MVT VT = N->getSimpleValueType(0);
7395 SDLoc DL(N);
7396
7397 if (!Subtarget.hasConditionalMoveFusion()) {
7398 // (select c, -1, y) -> -c | y
7399 if (isAllOnesConstant(TrueV)) {
7400 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7401 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
7402 }
7403 // (select c, y, -1) -> (c-1) | y
7404 if (isAllOnesConstant(FalseV)) {
7405 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7406 DAG.getAllOnesConstant(DL, VT));
7407 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
7408 }
7409
7410 // (select c, 0, y) -> (c-1) & y
7411 if (isNullConstant(TrueV)) {
7412 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7413 DAG.getAllOnesConstant(DL, VT));
7414 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
7415 }
7416 // (select c, y, 0) -> -c & y
7417 if (isNullConstant(FalseV)) {
7418 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7419 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
7420 }
7421 }
7422
7423 // select c, ~x, x --> xor -c, x
7424 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7425 const APInt &TrueVal = TrueV->getAsAPIntVal();
7426 const APInt &FalseVal = FalseV->getAsAPIntVal();
7427 if (~TrueVal == FalseVal) {
7428 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7429 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
7430 }
7431 }
7432
7433 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
7434 // when both truev and falsev are also setcc.
7435 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
7436 FalseV.getOpcode() == ISD::SETCC) {
7437 SDValue LHS = CondV.getOperand(0);
7438 SDValue RHS = CondV.getOperand(1);
7439 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7440
7441 // (select x, x, y) -> x | y
7442 // (select !x, x, y) -> x & y
7443 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
7444 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
7445 DAG.getFreeze(FalseV));
7446 }
7447 // (select x, y, x) -> x & y
7448 // (select !x, y, x) -> x | y
7449 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
7450 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
7451 DAG.getFreeze(TrueV), FalseV);
7452 }
7453 }
7454
7455 return SDValue();
7456}
7457
7458// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
7459// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
7460// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
7461// being `0` or `-1`. In such cases we can replace `select` with `and`.
7462// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
7463// than `c0`?
7464static SDValue
7466 const RISCVSubtarget &Subtarget) {
7467 if (Subtarget.hasShortForwardBranchOpt())
7468 return SDValue();
7469
7470 unsigned SelOpNo = 0;
7471 SDValue Sel = BO->getOperand(0);
7472 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
7473 SelOpNo = 1;
7474 Sel = BO->getOperand(1);
7475 }
7476
7477 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
7478 return SDValue();
7479
7480 unsigned ConstSelOpNo = 1;
7481 unsigned OtherSelOpNo = 2;
7482 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
7483 ConstSelOpNo = 2;
7484 OtherSelOpNo = 1;
7485 }
7486 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
7487 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
7488 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
7489 return SDValue();
7490
7491 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
7492 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
7493 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
7494 return SDValue();
7495
7496 SDLoc DL(Sel);
7497 EVT VT = BO->getValueType(0);
7498
7499 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
7500 if (SelOpNo == 1)
7501 std::swap(NewConstOps[0], NewConstOps[1]);
7502
7503 SDValue NewConstOp =
7504 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
7505 if (!NewConstOp)
7506 return SDValue();
7507
7508 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
7509 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
7510 return SDValue();
7511
7512 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
7513 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
7514 if (SelOpNo == 1)
7515 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
7516 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
7517
7518 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
7519 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
7520 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
7521}
7522
7523SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
7524 SDValue CondV = Op.getOperand(0);
7525 SDValue TrueV = Op.getOperand(1);
7526 SDValue FalseV = Op.getOperand(2);
7527 SDLoc DL(Op);
7528 MVT VT = Op.getSimpleValueType();
7529 MVT XLenVT = Subtarget.getXLenVT();
7530
7531 // Lower vector SELECTs to VSELECTs by splatting the condition.
7532 if (VT.isVector()) {
7533 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
7534 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
7535 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
7536 }
7537
7538 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
7539 // nodes to implement the SELECT. Performing the lowering here allows for
7540 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
7541 // sequence or RISCVISD::SELECT_CC node (branch-based select).
7542 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
7543 VT.isScalarInteger()) {
7544 // (select c, t, 0) -> (czero_eqz t, c)
7545 if (isNullConstant(FalseV))
7546 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
7547 // (select c, 0, f) -> (czero_nez f, c)
7548 if (isNullConstant(TrueV))
7549 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
7550
7551 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
7552 if (TrueV.getOpcode() == ISD::AND &&
7553 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
7554 return DAG.getNode(
7555 ISD::OR, DL, VT, TrueV,
7556 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7557 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
7558 if (FalseV.getOpcode() == ISD::AND &&
7559 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
7560 return DAG.getNode(
7561 ISD::OR, DL, VT, FalseV,
7562 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
7563
7564 // Try some other optimizations before falling back to generic lowering.
7565 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7566 return V;
7567
7568 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
7569 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
7570 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7571 const APInt &TrueVal = TrueV->getAsAPIntVal();
7572 const APInt &FalseVal = FalseV->getAsAPIntVal();
7573 const int TrueValCost = RISCVMatInt::getIntMatCost(
7574 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7575 const int FalseValCost = RISCVMatInt::getIntMatCost(
7576 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7577 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
7578 SDValue LHSVal = DAG.getConstant(
7579 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
7580 SDValue RHSVal =
7581 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
7582 SDValue CMOV =
7584 DL, VT, LHSVal, CondV);
7585 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
7586 }
7587
7588 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
7589 // Unless we have the short forward branch optimization.
7590 if (!Subtarget.hasConditionalMoveFusion())
7591 return DAG.getNode(
7592 ISD::OR, DL, VT,
7593 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
7594 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7595 }
7596
7597 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7598 return V;
7599
7600 if (Op.hasOneUse()) {
7601 unsigned UseOpc = Op->use_begin()->getOpcode();
7602 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
7603 SDNode *BinOp = *Op->use_begin();
7604 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
7605 DAG, Subtarget)) {
7606 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
7607 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
7608 // may return a constant node and cause crash in lowerSELECT.
7609 if (NewSel.getOpcode() == ISD::SELECT)
7610 return lowerSELECT(NewSel, DAG);
7611 return NewSel;
7612 }
7613 }
7614 }
7615
7616 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
7617 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
7618 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
7619 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
7620 if (FPTV && FPFV) {
7621 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
7622 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
7623 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
7624 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
7625 DAG.getConstant(1, DL, XLenVT));
7626 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
7627 }
7628 }
7629
7630 // If the condition is not an integer SETCC which operates on XLenVT, we need
7631 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
7632 // (select condv, truev, falsev)
7633 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
7634 if (CondV.getOpcode() != ISD::SETCC ||
7635 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
7636 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
7637 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
7638
7639 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
7640
7641 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7642 }
7643
7644 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7645 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7646 // advantage of the integer compare+branch instructions. i.e.:
7647 // (select (setcc lhs, rhs, cc), truev, falsev)
7648 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7649 SDValue LHS = CondV.getOperand(0);
7650 SDValue RHS = CondV.getOperand(1);
7651 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7652
7653 // Special case for a select of 2 constants that have a diffence of 1.
7654 // Normally this is done by DAGCombine, but if the select is introduced by
7655 // type legalization or op legalization, we miss it. Restricting to SETLT
7656 // case for now because that is what signed saturating add/sub need.
7657 // FIXME: We don't need the condition to be SETLT or even a SETCC,
7658 // but we would probably want to swap the true/false values if the condition
7659 // is SETGE/SETLE to avoid an XORI.
7660 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
7661 CCVal == ISD::SETLT) {
7662 const APInt &TrueVal = TrueV->getAsAPIntVal();
7663 const APInt &FalseVal = FalseV->getAsAPIntVal();
7664 if (TrueVal - 1 == FalseVal)
7665 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
7666 if (TrueVal + 1 == FalseVal)
7667 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
7668 }
7669
7670 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7671 // 1 < x ? x : 1 -> 0 < x ? x : 1
7672 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
7673 RHS == TrueV && LHS == FalseV) {
7674 LHS = DAG.getConstant(0, DL, VT);
7675 // 0 <u x is the same as x != 0.
7676 if (CCVal == ISD::SETULT) {
7677 std::swap(LHS, RHS);
7678 CCVal = ISD::SETNE;
7679 }
7680 }
7681
7682 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
7683 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
7684 RHS == FalseV) {
7685 RHS = DAG.getConstant(0, DL, VT);
7686 }
7687
7688 SDValue TargetCC = DAG.getCondCode(CCVal);
7689
7690 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
7691 // (select (setcc lhs, rhs, CC), constant, falsev)
7692 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7693 std::swap(TrueV, FalseV);
7694 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
7695 }
7696
7697 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
7698 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7699}
7700
7701SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
7702 SDValue CondV = Op.getOperand(1);
7703 SDLoc DL(Op);
7704 MVT XLenVT = Subtarget.getXLenVT();
7705
7706 if (CondV.getOpcode() == ISD::SETCC &&
7707 CondV.getOperand(0).getValueType() == XLenVT) {
7708 SDValue LHS = CondV.getOperand(0);
7709 SDValue RHS = CondV.getOperand(1);
7710 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7711
7712 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7713
7714 SDValue TargetCC = DAG.getCondCode(CCVal);
7715 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7716 LHS, RHS, TargetCC, Op.getOperand(2));
7717 }
7718
7719 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7720 CondV, DAG.getConstant(0, DL, XLenVT),
7721 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
7722}
7723
7724SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
7727
7728 SDLoc DL(Op);
7729 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
7731
7732 // vastart just stores the address of the VarArgsFrameIndex slot into the
7733 // memory location argument.
7734 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7735 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
7736 MachinePointerInfo(SV));
7737}
7738
7739SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
7740 SelectionDAG &DAG) const {
7741 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7743 MachineFrameInfo &MFI = MF.getFrameInfo();
7744 MFI.setFrameAddressIsTaken(true);
7745 Register FrameReg = RI.getFrameRegister(MF);
7746 int XLenInBytes = Subtarget.getXLen() / 8;
7747
7748 EVT VT = Op.getValueType();
7749 SDLoc DL(Op);
7750 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
7751 unsigned Depth = Op.getConstantOperandVal(0);
7752 while (Depth--) {
7753 int Offset = -(XLenInBytes * 2);
7754 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
7756 FrameAddr =
7757 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
7758 }
7759 return FrameAddr;
7760}
7761
7762SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
7763 SelectionDAG &DAG) const {
7764 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7766 MachineFrameInfo &MFI = MF.getFrameInfo();
7767 MFI.setReturnAddressIsTaken(true);
7768 MVT XLenVT = Subtarget.getXLenVT();
7769 int XLenInBytes = Subtarget.getXLen() / 8;
7770
7772 return SDValue();
7773
7774 EVT VT = Op.getValueType();
7775 SDLoc DL(Op);
7776 unsigned Depth = Op.getConstantOperandVal(0);
7777 if (Depth) {
7778 int Off = -XLenInBytes;
7779 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
7780 SDValue Offset = DAG.getConstant(Off, DL, VT);
7781 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
7782 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
7784 }
7785
7786 // Return the value of the return address register, marking it an implicit
7787 // live-in.
7788 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
7789 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
7790}
7791
7792SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
7793 SelectionDAG &DAG) const {
7794 SDLoc DL(Op);
7795 SDValue Lo = Op.getOperand(0);
7796 SDValue Hi = Op.getOperand(1);
7797 SDValue Shamt = Op.getOperand(2);
7798 EVT VT = Lo.getValueType();
7799
7800 // if Shamt-XLEN < 0: // Shamt < XLEN
7801 // Lo = Lo << Shamt
7802 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
7803 // else:
7804 // Lo = 0
7805 // Hi = Lo << (Shamt-XLEN)
7806
7807 SDValue Zero = DAG.getConstant(0, DL, VT);
7808 SDValue One = DAG.getConstant(1, DL, VT);
7809 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7810 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7811 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7812 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7813
7814 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
7815 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
7816 SDValue ShiftRightLo =
7817 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
7818 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
7819 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
7820 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
7821
7822 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7823
7824 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
7825 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7826
7827 SDValue Parts[2] = {Lo, Hi};
7828 return DAG.getMergeValues(Parts, DL);
7829}
7830
7831SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
7832 bool IsSRA) const {
7833 SDLoc DL(Op);
7834 SDValue Lo = Op.getOperand(0);
7835 SDValue Hi = Op.getOperand(1);
7836 SDValue Shamt = Op.getOperand(2);
7837 EVT VT = Lo.getValueType();
7838
7839 // SRA expansion:
7840 // if Shamt-XLEN < 0: // Shamt < XLEN
7841 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7842 // Hi = Hi >>s Shamt
7843 // else:
7844 // Lo = Hi >>s (Shamt-XLEN);
7845 // Hi = Hi >>s (XLEN-1)
7846 //
7847 // SRL expansion:
7848 // if Shamt-XLEN < 0: // Shamt < XLEN
7849 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7850 // Hi = Hi >>u Shamt
7851 // else:
7852 // Lo = Hi >>u (Shamt-XLEN);
7853 // Hi = 0;
7854
7855 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
7856
7857 SDValue Zero = DAG.getConstant(0, DL, VT);
7858 SDValue One = DAG.getConstant(1, DL, VT);
7859 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7860 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7861 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7862 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7863
7864 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
7865 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
7866 SDValue ShiftLeftHi =
7867 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
7868 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
7869 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
7870 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
7871 SDValue HiFalse =
7872 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
7873
7874 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7875
7876 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
7877 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7878
7879 SDValue Parts[2] = {Lo, Hi};
7880 return DAG.getMergeValues(Parts, DL);
7881}
7882
7883// Lower splats of i1 types to SETCC. For each mask vector type, we have a
7884// legal equivalently-sized i8 type, so we can use that as a go-between.
7885SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
7886 SelectionDAG &DAG) const {
7887 SDLoc DL(Op);
7888 MVT VT = Op.getSimpleValueType();
7889 SDValue SplatVal = Op.getOperand(0);
7890 // All-zeros or all-ones splats are handled specially.
7891 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
7892 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7893 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
7894 }
7895 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
7896 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7897 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
7898 }
7899 MVT InterVT = VT.changeVectorElementType(MVT::i8);
7900 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
7901 DAG.getConstant(1, DL, SplatVal.getValueType()));
7902 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
7903 SDValue Zero = DAG.getConstant(0, DL, InterVT);
7904 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
7905}
7906
7907// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
7908// illegal (currently only vXi64 RV32).
7909// FIXME: We could also catch non-constant sign-extended i32 values and lower
7910// them to VMV_V_X_VL.
7911SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
7912 SelectionDAG &DAG) const {
7913 SDLoc DL(Op);
7914 MVT VecVT = Op.getSimpleValueType();
7915 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
7916 "Unexpected SPLAT_VECTOR_PARTS lowering");
7917
7918 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
7919 SDValue Lo = Op.getOperand(0);
7920 SDValue Hi = Op.getOperand(1);
7921
7922 MVT ContainerVT = VecVT;
7923 if (VecVT.isFixedLengthVector())
7924 ContainerVT = getContainerForFixedLengthVector(VecVT);
7925
7926 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7927
7928 SDValue Res =
7929 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
7930
7931 if (VecVT.isFixedLengthVector())
7932 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
7933
7934 return Res;
7935}
7936
7937// Custom-lower extensions from mask vectors by using a vselect either with 1
7938// for zero/any-extension or -1 for sign-extension:
7939// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
7940// Note that any-extension is lowered identically to zero-extension.
7941SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
7942 int64_t ExtTrueVal) const {
7943 SDLoc DL(Op);
7944 MVT VecVT = Op.getSimpleValueType();
7945 SDValue Src = Op.getOperand(0);
7946 // Only custom-lower extensions from mask types
7947 assert(Src.getValueType().isVector() &&
7948 Src.getValueType().getVectorElementType() == MVT::i1);
7949
7950 if (VecVT.isScalableVector()) {
7951 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
7952 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
7953 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
7954 }
7955
7956 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
7957 MVT I1ContainerVT =
7958 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
7959
7960 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
7961
7962 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7963
7964 MVT XLenVT = Subtarget.getXLenVT();
7965 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
7966 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
7967
7968 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7969 DAG.getUNDEF(ContainerVT), SplatZero, VL);
7970 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7971 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
7972 SDValue Select =
7973 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
7974 SplatZero, DAG.getUNDEF(ContainerVT), VL);
7975
7976 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
7977}
7978
7979SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
7980 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
7981 MVT ExtVT = Op.getSimpleValueType();
7982 // Only custom-lower extensions from fixed-length vector types.
7983 if (!ExtVT.isFixedLengthVector())
7984 return Op;
7985 MVT VT = Op.getOperand(0).getSimpleValueType();
7986 // Grab the canonical container type for the extended type. Infer the smaller
7987 // type from that to ensure the same number of vector elements, as we know
7988 // the LMUL will be sufficient to hold the smaller type.
7989 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
7990 // Get the extended container type manually to ensure the same number of
7991 // vector elements between source and dest.
7992 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
7993 ContainerExtVT.getVectorElementCount());
7994
7995 SDValue Op1 =
7996 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
7997
7998 SDLoc DL(Op);
7999 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8000
8001 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
8002
8003 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
8004}
8005
8006// Custom-lower truncations from vectors to mask vectors by using a mask and a
8007// setcc operation:
8008// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
8009SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
8010 SelectionDAG &DAG) const {
8011 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8012 SDLoc DL(Op);
8013 EVT MaskVT = Op.getValueType();
8014 // Only expect to custom-lower truncations to mask types
8015 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
8016 "Unexpected type for vector mask lowering");
8017 SDValue Src = Op.getOperand(0);
8018 MVT VecVT = Src.getSimpleValueType();
8019 SDValue Mask, VL;
8020 if (IsVPTrunc) {
8021 Mask = Op.getOperand(1);
8022 VL = Op.getOperand(2);
8023 }
8024 // If this is a fixed vector, we need to convert it to a scalable vector.
8025 MVT ContainerVT = VecVT;
8026
8027 if (VecVT.isFixedLengthVector()) {
8028 ContainerVT = getContainerForFixedLengthVector(VecVT);
8029 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8030 if (IsVPTrunc) {
8031 MVT MaskContainerVT =
8032 getContainerForFixedLengthVector(Mask.getSimpleValueType());
8033 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
8034 }
8035 }
8036
8037 if (!IsVPTrunc) {
8038 std::tie(Mask, VL) =
8039 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8040 }
8041
8042 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
8043 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8044
8045 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8046 DAG.getUNDEF(ContainerVT), SplatOne, VL);
8047 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8048 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8049
8050 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
8051 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
8052 DAG.getUNDEF(ContainerVT), Mask, VL);
8053 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
8054 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
8055 DAG.getUNDEF(MaskContainerVT), Mask, VL});
8056 if (MaskVT.isFixedLengthVector())
8057 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
8058 return Trunc;
8059}
8060
8061SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
8062 SelectionDAG &DAG) const {
8063 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8064 SDLoc DL(Op);
8065
8066 MVT VT = Op.getSimpleValueType();
8067 // Only custom-lower vector truncates
8068 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8069
8070 // Truncates to mask types are handled differently
8071 if (VT.getVectorElementType() == MVT::i1)
8072 return lowerVectorMaskTruncLike(Op, DAG);
8073
8074 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
8075 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
8076 // truncate by one power of two at a time.
8077 MVT DstEltVT = VT.getVectorElementType();
8078
8079 SDValue Src = Op.getOperand(0);
8080 MVT SrcVT = Src.getSimpleValueType();
8081 MVT SrcEltVT = SrcVT.getVectorElementType();
8082
8083 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
8084 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
8085 "Unexpected vector truncate lowering");
8086
8087 MVT ContainerVT = SrcVT;
8088 SDValue Mask, VL;
8089 if (IsVPTrunc) {
8090 Mask = Op.getOperand(1);
8091 VL = Op.getOperand(2);
8092 }
8093 if (SrcVT.isFixedLengthVector()) {
8094 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8095 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8096 if (IsVPTrunc) {
8097 MVT MaskVT = getMaskTypeFor(ContainerVT);
8098 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8099 }
8100 }
8101
8102 SDValue Result = Src;
8103 if (!IsVPTrunc) {
8104 std::tie(Mask, VL) =
8105 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8106 }
8107
8108 do {
8109 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8110 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
8111 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
8112 Mask, VL);
8113 } while (SrcEltVT != DstEltVT);
8114
8115 if (SrcVT.isFixedLengthVector())
8116 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8117
8118 return Result;
8119}
8120
8121SDValue
8122RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8123 SelectionDAG &DAG) const {
8124 SDLoc DL(Op);
8125 SDValue Chain = Op.getOperand(0);
8126 SDValue Src = Op.getOperand(1);
8127 MVT VT = Op.getSimpleValueType();
8128 MVT SrcVT = Src.getSimpleValueType();
8129 MVT ContainerVT = VT;
8130 if (VT.isFixedLengthVector()) {
8131 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8132 ContainerVT =
8133 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8134 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8135 }
8136
8137 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8138
8139 // RVV can only widen/truncate fp to types double/half the size as the source.
8140 if ((VT.getVectorElementType() == MVT::f64 &&
8141 (SrcVT.getVectorElementType() == MVT::f16 ||
8142 SrcVT.getVectorElementType() == MVT::bf16)) ||
8143 ((VT.getVectorElementType() == MVT::f16 ||
8144 VT.getVectorElementType() == MVT::bf16) &&
8145 SrcVT.getVectorElementType() == MVT::f64)) {
8146 // For double rounding, the intermediate rounding should be round-to-odd.
8147 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8150 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8151 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8152 Chain, Src, Mask, VL);
8153 Chain = Src.getValue(1);
8154 }
8155
8156 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8159 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8160 Chain, Src, Mask, VL);
8161 if (VT.isFixedLengthVector()) {
8162 // StrictFP operations have two result values. Their lowered result should
8163 // have same result count.
8164 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8165 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8166 }
8167 return Res;
8168}
8169
8170SDValue
8171RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8172 SelectionDAG &DAG) const {
8173 bool IsVP =
8174 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8175 bool IsExtend =
8176 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8177 // RVV can only do truncate fp to types half the size as the source. We
8178 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8179 // conversion instruction.
8180 SDLoc DL(Op);
8181 MVT VT = Op.getSimpleValueType();
8182
8183 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8184
8185 SDValue Src = Op.getOperand(0);
8186 MVT SrcVT = Src.getSimpleValueType();
8187
8188 bool IsDirectExtend =
8189 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8190 (SrcVT.getVectorElementType() != MVT::f16 &&
8191 SrcVT.getVectorElementType() != MVT::bf16));
8192 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
8193 VT.getVectorElementType() != MVT::bf16) ||
8194 SrcVT.getVectorElementType() != MVT::f64);
8195
8196 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
8197
8198 // Prepare any fixed-length vector operands.
8199 MVT ContainerVT = VT;
8200 SDValue Mask, VL;
8201 if (IsVP) {
8202 Mask = Op.getOperand(1);
8203 VL = Op.getOperand(2);
8204 }
8205 if (VT.isFixedLengthVector()) {
8206 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8207 ContainerVT =
8208 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8209 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8210 if (IsVP) {
8211 MVT MaskVT = getMaskTypeFor(ContainerVT);
8212 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8213 }
8214 }
8215
8216 if (!IsVP)
8217 std::tie(Mask, VL) =
8218 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8219
8220 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
8221
8222 if (IsDirectConv) {
8223 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
8224 if (VT.isFixedLengthVector())
8225 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
8226 return Src;
8227 }
8228
8229 unsigned InterConvOpc =
8231
8232 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8233 SDValue IntermediateConv =
8234 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
8235 SDValue Result =
8236 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
8237 if (VT.isFixedLengthVector())
8238 return convertFromScalableVector(VT, Result, DAG, Subtarget);
8239 return Result;
8240}
8241
8242// Given a scalable vector type and an index into it, returns the type for the
8243// smallest subvector that the index fits in. This can be used to reduce LMUL
8244// for operations like vslidedown.
8245//
8246// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
8247static std::optional<MVT>
8248getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
8249 const RISCVSubtarget &Subtarget) {
8250 assert(VecVT.isScalableVector());
8251 const unsigned EltSize = VecVT.getScalarSizeInBits();
8252 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
8253 const unsigned MinVLMAX = VectorBitsMin / EltSize;
8254 MVT SmallerVT;
8255 if (MaxIdx < MinVLMAX)
8256 SmallerVT = getLMUL1VT(VecVT);
8257 else if (MaxIdx < MinVLMAX * 2)
8258 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
8259 else if (MaxIdx < MinVLMAX * 4)
8260 SmallerVT = getLMUL1VT(VecVT)
8263 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
8264 return std::nullopt;
8265 return SmallerVT;
8266}
8267
8268// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
8269// first position of a vector, and that vector is slid up to the insert index.
8270// By limiting the active vector length to index+1 and merging with the
8271// original vector (with an undisturbed tail policy for elements >= VL), we
8272// achieve the desired result of leaving all elements untouched except the one
8273// at VL-1, which is replaced with the desired value.
8274SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
8275 SelectionDAG &DAG) const {
8276 SDLoc DL(Op);
8277 MVT VecVT = Op.getSimpleValueType();
8278 SDValue Vec = Op.getOperand(0);
8279 SDValue Val = Op.getOperand(1);
8280 SDValue Idx = Op.getOperand(2);
8281
8282 if (VecVT.getVectorElementType() == MVT::i1) {
8283 // FIXME: For now we just promote to an i8 vector and insert into that,
8284 // but this is probably not optimal.
8285 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8286 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8287 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
8288 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
8289 }
8290
8291 MVT ContainerVT = VecVT;
8292 // If the operand is a fixed-length vector, convert to a scalable one.
8293 if (VecVT.isFixedLengthVector()) {
8294 ContainerVT = getContainerForFixedLengthVector(VecVT);
8295 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8296 }
8297
8298 // If we know the index we're going to insert at, we can shrink Vec so that
8299 // we're performing the scalar inserts and slideup on a smaller LMUL.
8300 MVT OrigContainerVT = ContainerVT;
8301 SDValue OrigVec = Vec;
8302 SDValue AlignedIdx;
8303 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
8304 const unsigned OrigIdx = IdxC->getZExtValue();
8305 // Do we know an upper bound on LMUL?
8306 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
8307 DL, DAG, Subtarget)) {
8308 ContainerVT = *ShrunkVT;
8309 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
8310 }
8311
8312 // If we're compiling for an exact VLEN value, we can always perform
8313 // the insert in m1 as we can determine the register corresponding to
8314 // the index in the register group.
8315 const MVT M1VT = getLMUL1VT(ContainerVT);
8316 if (auto VLEN = Subtarget.getRealVLen();
8317 VLEN && ContainerVT.bitsGT(M1VT)) {
8318 EVT ElemVT = VecVT.getVectorElementType();
8319 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
8320 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8321 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8322 unsigned ExtractIdx =
8323 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8324 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
8325 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8326 ContainerVT = M1VT;
8327 }
8328
8329 if (AlignedIdx)
8330 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8331 AlignedIdx);
8332 }
8333
8334 MVT XLenVT = Subtarget.getXLenVT();
8335
8336 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
8337 // Even i64-element vectors on RV32 can be lowered without scalar
8338 // legalization if the most-significant 32 bits of the value are not affected
8339 // by the sign-extension of the lower 32 bits.
8340 // TODO: We could also catch sign extensions of a 32-bit value.
8341 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
8342 const auto *CVal = cast<ConstantSDNode>(Val);
8343 if (isInt<32>(CVal->getSExtValue())) {
8344 IsLegalInsert = true;
8345 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
8346 }
8347 }
8348
8349 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8350
8351 SDValue ValInVec;
8352
8353 if (IsLegalInsert) {
8354 unsigned Opc =
8356 if (isNullConstant(Idx)) {
8357 if (!VecVT.isFloatingPoint())
8358 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
8359 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
8360
8361 if (AlignedIdx)
8362 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8363 Vec, AlignedIdx);
8364 if (!VecVT.isFixedLengthVector())
8365 return Vec;
8366 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
8367 }
8368 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
8369 } else {
8370 // On RV32, i64-element vectors must be specially handled to place the
8371 // value at element 0, by using two vslide1down instructions in sequence on
8372 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
8373 // this.
8374 SDValue ValLo, ValHi;
8375 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
8376 MVT I32ContainerVT =
8377 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
8378 SDValue I32Mask =
8379 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
8380 // Limit the active VL to two.
8381 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
8382 // If the Idx is 0 we can insert directly into the vector.
8383 if (isNullConstant(Idx)) {
8384 // First slide in the lo value, then the hi in above it. We use slide1down
8385 // to avoid the register group overlap constraint of vslide1up.
8386 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8387 Vec, Vec, ValLo, I32Mask, InsertI64VL);
8388 // If the source vector is undef don't pass along the tail elements from
8389 // the previous slide1down.
8390 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
8391 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8392 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
8393 // Bitcast back to the right container type.
8394 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8395
8396 if (AlignedIdx)
8397 ValInVec =
8398 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8399 ValInVec, AlignedIdx);
8400 if (!VecVT.isFixedLengthVector())
8401 return ValInVec;
8402 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
8403 }
8404
8405 // First slide in the lo value, then the hi in above it. We use slide1down
8406 // to avoid the register group overlap constraint of vslide1up.
8407 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8408 DAG.getUNDEF(I32ContainerVT),
8409 DAG.getUNDEF(I32ContainerVT), ValLo,
8410 I32Mask, InsertI64VL);
8411 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8412 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
8413 I32Mask, InsertI64VL);
8414 // Bitcast back to the right container type.
8415 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8416 }
8417
8418 // Now that the value is in a vector, slide it into position.
8419 SDValue InsertVL =
8420 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
8421
8422 // Use tail agnostic policy if Idx is the last index of Vec.
8424 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
8425 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
8426 Policy = RISCVII::TAIL_AGNOSTIC;
8427 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
8428 Idx, Mask, InsertVL, Policy);
8429
8430 if (AlignedIdx)
8431 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8432 Slideup, AlignedIdx);
8433 if (!VecVT.isFixedLengthVector())
8434 return Slideup;
8435 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
8436}
8437
8438// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
8439// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
8440// types this is done using VMV_X_S to allow us to glean information about the
8441// sign bits of the result.
8442SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
8443 SelectionDAG &DAG) const {
8444 SDLoc DL(Op);
8445 SDValue Idx = Op.getOperand(1);
8446 SDValue Vec = Op.getOperand(0);
8447 EVT EltVT = Op.getValueType();
8448 MVT VecVT = Vec.getSimpleValueType();
8449 MVT XLenVT = Subtarget.getXLenVT();
8450
8451 if (VecVT.getVectorElementType() == MVT::i1) {
8452 // Use vfirst.m to extract the first bit.
8453 if (isNullConstant(Idx)) {
8454 MVT ContainerVT = VecVT;
8455 if (VecVT.isFixedLengthVector()) {
8456 ContainerVT = getContainerForFixedLengthVector(VecVT);
8457 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8458 }
8459 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8460 SDValue Vfirst =
8461 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
8462 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
8463 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
8464 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8465 }
8466 if (VecVT.isFixedLengthVector()) {
8467 unsigned NumElts = VecVT.getVectorNumElements();
8468 if (NumElts >= 8) {
8469 MVT WideEltVT;
8470 unsigned WidenVecLen;
8471 SDValue ExtractElementIdx;
8472 SDValue ExtractBitIdx;
8473 unsigned MaxEEW = Subtarget.getELen();
8474 MVT LargestEltVT = MVT::getIntegerVT(
8475 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
8476 if (NumElts <= LargestEltVT.getSizeInBits()) {
8477 assert(isPowerOf2_32(NumElts) &&
8478 "the number of elements should be power of 2");
8479 WideEltVT = MVT::getIntegerVT(NumElts);
8480 WidenVecLen = 1;
8481 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
8482 ExtractBitIdx = Idx;
8483 } else {
8484 WideEltVT = LargestEltVT;
8485 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
8486 // extract element index = index / element width
8487 ExtractElementIdx = DAG.getNode(
8488 ISD::SRL, DL, XLenVT, Idx,
8489 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
8490 // mask bit index = index % element width
8491 ExtractBitIdx = DAG.getNode(
8492 ISD::AND, DL, XLenVT, Idx,
8493 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
8494 }
8495 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
8496 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
8497 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
8498 Vec, ExtractElementIdx);
8499 // Extract the bit from GPR.
8500 SDValue ShiftRight =
8501 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
8502 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
8503 DAG.getConstant(1, DL, XLenVT));
8504 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8505 }
8506 }
8507 // Otherwise, promote to an i8 vector and extract from that.
8508 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8509 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8510 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
8511 }
8512
8513 // If this is a fixed vector, we need to convert it to a scalable vector.
8514 MVT ContainerVT = VecVT;
8515 if (VecVT.isFixedLengthVector()) {
8516 ContainerVT = getContainerForFixedLengthVector(VecVT);
8517 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8518 }
8519
8520 // If we're compiling for an exact VLEN value and we have a known
8521 // constant index, we can always perform the extract in m1 (or
8522 // smaller) as we can determine the register corresponding to
8523 // the index in the register group.
8524 const auto VLen = Subtarget.getRealVLen();
8525 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
8526 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
8527 MVT M1VT = getLMUL1VT(ContainerVT);
8528 unsigned OrigIdx = IdxC->getZExtValue();
8529 EVT ElemVT = VecVT.getVectorElementType();
8530 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
8531 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8532 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8533 unsigned ExtractIdx =
8534 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8535 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
8536 DAG.getVectorIdxConstant(ExtractIdx, DL));
8537 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8538 ContainerVT = M1VT;
8539 }
8540
8541 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
8542 // contains our index.
8543 std::optional<uint64_t> MaxIdx;
8544 if (VecVT.isFixedLengthVector())
8545 MaxIdx = VecVT.getVectorNumElements() - 1;
8546 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
8547 MaxIdx = IdxC->getZExtValue();
8548 if (MaxIdx) {
8549 if (auto SmallerVT =
8550 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
8551 ContainerVT = *SmallerVT;
8552 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8553 DAG.getConstant(0, DL, XLenVT));
8554 }
8555 }
8556
8557 // If after narrowing, the required slide is still greater than LMUL2,
8558 // fallback to generic expansion and go through the stack. This is done
8559 // for a subtle reason: extracting *all* elements out of a vector is
8560 // widely expected to be linear in vector size, but because vslidedown
8561 // is linear in LMUL, performing N extracts using vslidedown becomes
8562 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
8563 // seems to have the same problem (the store is linear in LMUL), but the
8564 // generic expansion *memoizes* the store, and thus for many extracts of
8565 // the same vector we end up with one store and a bunch of loads.
8566 // TODO: We don't have the same code for insert_vector_elt because we
8567 // have BUILD_VECTOR and handle the degenerate case there. Should we
8568 // consider adding an inverse BUILD_VECTOR node?
8569 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
8570 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
8571 return SDValue();
8572
8573 // If the index is 0, the vector is already in the right position.
8574 if (!isNullConstant(Idx)) {
8575 // Use a VL of 1 to avoid processing more elements than we need.
8576 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
8577 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
8578 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
8579 }
8580
8581 if (!EltVT.isInteger()) {
8582 // Floating-point extracts are handled in TableGen.
8583 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
8584 DAG.getVectorIdxConstant(0, DL));
8585 }
8586
8587 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
8588 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
8589}
8590
8591// Some RVV intrinsics may claim that they want an integer operand to be
8592// promoted or expanded.
8594 const RISCVSubtarget &Subtarget) {
8595 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
8596 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
8597 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
8598 "Unexpected opcode");
8599
8600 if (!Subtarget.hasVInstructions())
8601 return SDValue();
8602
8603 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8604 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8605 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8606
8607 SDLoc DL(Op);
8608
8610 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8611 if (!II || !II->hasScalarOperand())
8612 return SDValue();
8613
8614 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
8615 assert(SplatOp < Op.getNumOperands());
8616
8618 SDValue &ScalarOp = Operands[SplatOp];
8619 MVT OpVT = ScalarOp.getSimpleValueType();
8620 MVT XLenVT = Subtarget.getXLenVT();
8621
8622 // If this isn't a scalar, or its type is XLenVT we're done.
8623 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8624 return SDValue();
8625
8626 // Simplest case is that the operand needs to be promoted to XLenVT.
8627 if (OpVT.bitsLT(XLenVT)) {
8628 // If the operand is a constant, sign extend to increase our chances
8629 // of being able to use a .vi instruction. ANY_EXTEND would become a
8630 // a zero extend and the simm5 check in isel would fail.
8631 // FIXME: Should we ignore the upper bits in isel instead?
8632 unsigned ExtOpc =
8633 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8634 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8635 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8636 }
8637
8638 // Use the previous operand to get the vXi64 VT. The result might be a mask
8639 // VT for compares. Using the previous operand assumes that the previous
8640 // operand will never have a smaller element size than a scalar operand and
8641 // that a widening operation never uses SEW=64.
8642 // NOTE: If this fails the below assert, we can probably just find the
8643 // element count from any operand or result and use it to construct the VT.
8644 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
8645 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
8646
8647 // The more complex case is when the scalar is larger than XLenVT.
8648 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
8649 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
8650
8651 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
8652 // instruction to sign-extend since SEW>XLEN.
8653 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
8654 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
8655 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8656 }
8657
8658 switch (IntNo) {
8659 case Intrinsic::riscv_vslide1up:
8660 case Intrinsic::riscv_vslide1down:
8661 case Intrinsic::riscv_vslide1up_mask:
8662 case Intrinsic::riscv_vslide1down_mask: {
8663 // We need to special case these when the scalar is larger than XLen.
8664 unsigned NumOps = Op.getNumOperands();
8665 bool IsMasked = NumOps == 7;
8666
8667 // Convert the vector source to the equivalent nxvXi32 vector.
8668 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
8669 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
8670 SDValue ScalarLo, ScalarHi;
8671 std::tie(ScalarLo, ScalarHi) =
8672 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
8673
8674 // Double the VL since we halved SEW.
8675 SDValue AVL = getVLOperand(Op);
8676 SDValue I32VL;
8677
8678 // Optimize for constant AVL
8679 if (isa<ConstantSDNode>(AVL)) {
8680 const auto [MinVLMAX, MaxVLMAX] =
8682
8683 uint64_t AVLInt = AVL->getAsZExtVal();
8684 if (AVLInt <= MinVLMAX) {
8685 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
8686 } else if (AVLInt >= 2 * MaxVLMAX) {
8687 // Just set vl to VLMAX in this situation
8688 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
8689 } else {
8690 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
8691 // is related to the hardware implementation.
8692 // So let the following code handle
8693 }
8694 }
8695 if (!I32VL) {
8697 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8698 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
8699 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8700 SDValue SETVL =
8701 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
8702 // Using vsetvli instruction to get actually used length which related to
8703 // the hardware implementation
8704 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
8705 SEW, LMUL);
8706 I32VL =
8707 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
8708 }
8709
8710 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
8711
8712 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
8713 // instructions.
8714 SDValue Passthru;
8715 if (IsMasked)
8716 Passthru = DAG.getUNDEF(I32VT);
8717 else
8718 Passthru = DAG.getBitcast(I32VT, Operands[1]);
8719
8720 if (IntNo == Intrinsic::riscv_vslide1up ||
8721 IntNo == Intrinsic::riscv_vslide1up_mask) {
8722 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8723 ScalarHi, I32Mask, I32VL);
8724 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8725 ScalarLo, I32Mask, I32VL);
8726 } else {
8727 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8728 ScalarLo, I32Mask, I32VL);
8729 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8730 ScalarHi, I32Mask, I32VL);
8731 }
8732
8733 // Convert back to nxvXi64.
8734 Vec = DAG.getBitcast(VT, Vec);
8735
8736 if (!IsMasked)
8737 return Vec;
8738 // Apply mask after the operation.
8739 SDValue Mask = Operands[NumOps - 3];
8740 SDValue MaskedOff = Operands[1];
8741 // Assume Policy operand is the last operand.
8742 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
8743 // We don't need to select maskedoff if it's undef.
8744 if (MaskedOff.isUndef())
8745 return Vec;
8746 // TAMU
8747 if (Policy == RISCVII::TAIL_AGNOSTIC)
8748 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8749 DAG.getUNDEF(VT), AVL);
8750 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8751 // It's fine because vmerge does not care mask policy.
8752 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8753 MaskedOff, AVL);
8754 }
8755 }
8756
8757 // We need to convert the scalar to a splat vector.
8758 SDValue VL = getVLOperand(Op);
8759 assert(VL.getValueType() == XLenVT);
8760 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
8761 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8762}
8763
8764// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8765// scalable vector llvm.get.vector.length for now.
8766//
8767// We need to convert from a scalable VF to a vsetvli with VLMax equal to
8768// (vscale * VF). The vscale and VF are independent of element width. We use
8769// SEW=8 for the vsetvli because it is the only element width that supports all
8770// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8771// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
8772// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8773// SEW and LMUL are better for the surrounding vector instructions.
8775 const RISCVSubtarget &Subtarget) {
8776 MVT XLenVT = Subtarget.getXLenVT();
8777
8778 // The smallest LMUL is only valid for the smallest element width.
8779 const unsigned ElementWidth = 8;
8780
8781 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
8782 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
8783 // We don't support VF==1 with ELEN==32.
8784 [[maybe_unused]] unsigned MinVF =
8785 RISCV::RVVBitsPerBlock / Subtarget.getELen();
8786
8787 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
8788 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
8789 "Unexpected VF");
8790
8791 bool Fractional = VF < LMul1VF;
8792 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
8793 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
8794 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
8795
8796 SDLoc DL(N);
8797
8798 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
8799 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
8800
8801 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
8802
8803 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
8804 SDValue Res =
8805 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
8806 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
8807}
8808
8810 const RISCVSubtarget &Subtarget) {
8811 SDValue Op0 = N->getOperand(1);
8812 MVT OpVT = Op0.getSimpleValueType();
8813 MVT ContainerVT = OpVT;
8814 if (OpVT.isFixedLengthVector()) {
8815 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
8816 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
8817 }
8818 MVT XLenVT = Subtarget.getXLenVT();
8819 SDLoc DL(N);
8820 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
8821 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
8822 if (isOneConstant(N->getOperand(2)))
8823 return Res;
8824
8825 // Convert -1 to VL.
8826 SDValue Setcc =
8827 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
8828 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
8829 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
8830}
8831
8832static inline void promoteVCIXScalar(const SDValue &Op,
8834 SelectionDAG &DAG) {
8835 const RISCVSubtarget &Subtarget =
8837
8838 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8839 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8840 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8841 SDLoc DL(Op);
8842
8844 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8845 if (!II || !II->hasScalarOperand())
8846 return;
8847
8848 unsigned SplatOp = II->ScalarOperand + 1;
8849 assert(SplatOp < Op.getNumOperands());
8850
8851 SDValue &ScalarOp = Operands[SplatOp];
8852 MVT OpVT = ScalarOp.getSimpleValueType();
8853 MVT XLenVT = Subtarget.getXLenVT();
8854
8855 // The code below is partially copied from lowerVectorIntrinsicScalars.
8856 // If this isn't a scalar, or its type is XLenVT we're done.
8857 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8858 return;
8859
8860 // Manually emit promote operation for scalar operation.
8861 if (OpVT.bitsLT(XLenVT)) {
8862 unsigned ExtOpc =
8863 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8864 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8865 }
8866
8867 return;
8868}
8869
8870static void processVCIXOperands(SDValue &OrigOp,
8872 SelectionDAG &DAG) {
8873 promoteVCIXScalar(OrigOp, Operands, DAG);
8874 const RISCVSubtarget &Subtarget =
8876 for (SDValue &V : Operands) {
8877 EVT ValType = V.getValueType();
8878 if (ValType.isVector() && ValType.isFloatingPoint()) {
8879 MVT InterimIVT =
8880 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
8881 ValType.getVectorElementCount());
8882 V = DAG.getBitcast(InterimIVT, V);
8883 }
8884 if (ValType.isFixedLengthVector()) {
8885 MVT OpContainerVT = getContainerForFixedLengthVector(
8886 DAG, V.getSimpleValueType(), Subtarget);
8887 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
8888 }
8889 }
8890}
8891
8892// LMUL * VLEN should be greater than or equal to EGS * SEW
8893static inline bool isValidEGW(int EGS, EVT VT,
8894 const RISCVSubtarget &Subtarget) {
8895 return (Subtarget.getRealMinVLen() *
8897 EGS * VT.getScalarSizeInBits();
8898}
8899
8900SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8901 SelectionDAG &DAG) const {
8902 unsigned IntNo = Op.getConstantOperandVal(0);
8903 SDLoc DL(Op);
8904 MVT XLenVT = Subtarget.getXLenVT();
8905
8906 switch (IntNo) {
8907 default:
8908 break; // Don't custom lower most intrinsics.
8909 case Intrinsic::thread_pointer: {
8910 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8911 return DAG.getRegister(RISCV::X4, PtrVT);
8912 }
8913 case Intrinsic::riscv_orc_b:
8914 case Intrinsic::riscv_brev8:
8915 case Intrinsic::riscv_sha256sig0:
8916 case Intrinsic::riscv_sha256sig1:
8917 case Intrinsic::riscv_sha256sum0:
8918 case Intrinsic::riscv_sha256sum1:
8919 case Intrinsic::riscv_sm3p0:
8920 case Intrinsic::riscv_sm3p1: {
8921 unsigned Opc;
8922 switch (IntNo) {
8923 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
8924 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
8925 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
8926 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
8927 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
8928 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
8929 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
8930 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
8931 }
8932
8933 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8934 }
8935 case Intrinsic::riscv_sm4ks:
8936 case Intrinsic::riscv_sm4ed: {
8937 unsigned Opc =
8938 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
8939
8940 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
8941 Op.getOperand(3));
8942 }
8943 case Intrinsic::riscv_zip:
8944 case Intrinsic::riscv_unzip: {
8945 unsigned Opc =
8946 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
8947 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8948 }
8949 case Intrinsic::riscv_mopr:
8950 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
8951 Op.getOperand(2));
8952
8953 case Intrinsic::riscv_moprr: {
8954 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
8955 Op.getOperand(2), Op.getOperand(3));
8956 }
8957 case Intrinsic::riscv_clmul:
8958 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
8959 Op.getOperand(2));
8960 case Intrinsic::riscv_clmulh:
8961 case Intrinsic::riscv_clmulr: {
8962 unsigned Opc =
8963 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
8964 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
8965 }
8966 case Intrinsic::experimental_get_vector_length:
8967 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
8968 case Intrinsic::experimental_cttz_elts:
8969 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
8970 case Intrinsic::riscv_vmv_x_s: {
8971 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
8972 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
8973 }
8974 case Intrinsic::riscv_vfmv_f_s:
8975 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
8976 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
8977 case Intrinsic::riscv_vmv_v_x:
8978 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
8979 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
8980 Subtarget);
8981 case Intrinsic::riscv_vfmv_v_f:
8982 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
8983 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
8984 case Intrinsic::riscv_vmv_s_x: {
8985 SDValue Scalar = Op.getOperand(2);
8986
8987 if (Scalar.getValueType().bitsLE(XLenVT)) {
8988 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
8989 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
8990 Op.getOperand(1), Scalar, Op.getOperand(3));
8991 }
8992
8993 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
8994
8995 // This is an i64 value that lives in two scalar registers. We have to
8996 // insert this in a convoluted way. First we build vXi64 splat containing
8997 // the two values that we assemble using some bit math. Next we'll use
8998 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
8999 // to merge element 0 from our splat into the source vector.
9000 // FIXME: This is probably not the best way to do this, but it is
9001 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
9002 // point.
9003 // sw lo, (a0)
9004 // sw hi, 4(a0)
9005 // vlse vX, (a0)
9006 //
9007 // vid.v vVid
9008 // vmseq.vx mMask, vVid, 0
9009 // vmerge.vvm vDest, vSrc, vVal, mMask
9010 MVT VT = Op.getSimpleValueType();
9011 SDValue Vec = Op.getOperand(1);
9012 SDValue VL = getVLOperand(Op);
9013
9014 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
9015 if (Op.getOperand(1).isUndef())
9016 return SplattedVal;
9017 SDValue SplattedIdx =
9018 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9019 DAG.getConstant(0, DL, MVT::i32), VL);
9020
9021 MVT MaskVT = getMaskTypeFor(VT);
9022 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9023 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9024 SDValue SelectCond =
9025 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9026 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9027 DAG.getUNDEF(MaskVT), Mask, VL});
9028 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9029 Vec, DAG.getUNDEF(VT), VL);
9030 }
9031 case Intrinsic::riscv_vfmv_s_f:
9032 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9033 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9034 // EGS * EEW >= 128 bits
9035 case Intrinsic::riscv_vaesdf_vv:
9036 case Intrinsic::riscv_vaesdf_vs:
9037 case Intrinsic::riscv_vaesdm_vv:
9038 case Intrinsic::riscv_vaesdm_vs:
9039 case Intrinsic::riscv_vaesef_vv:
9040 case Intrinsic::riscv_vaesef_vs:
9041 case Intrinsic::riscv_vaesem_vv:
9042 case Intrinsic::riscv_vaesem_vs:
9043 case Intrinsic::riscv_vaeskf1:
9044 case Intrinsic::riscv_vaeskf2:
9045 case Intrinsic::riscv_vaesz_vs:
9046 case Intrinsic::riscv_vsm4k:
9047 case Intrinsic::riscv_vsm4r_vv:
9048 case Intrinsic::riscv_vsm4r_vs: {
9049 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9050 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9051 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9052 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9053 return Op;
9054 }
9055 // EGS * EEW >= 256 bits
9056 case Intrinsic::riscv_vsm3c:
9057 case Intrinsic::riscv_vsm3me: {
9058 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9059 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9060 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9061 return Op;
9062 }
9063 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9064 case Intrinsic::riscv_vsha2ch:
9065 case Intrinsic::riscv_vsha2cl:
9066 case Intrinsic::riscv_vsha2ms: {
9067 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9068 !Subtarget.hasStdExtZvknhb())
9069 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9070 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9071 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9072 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9073 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9074 return Op;
9075 }
9076 case Intrinsic::riscv_sf_vc_v_x:
9077 case Intrinsic::riscv_sf_vc_v_i:
9078 case Intrinsic::riscv_sf_vc_v_xv:
9079 case Intrinsic::riscv_sf_vc_v_iv:
9080 case Intrinsic::riscv_sf_vc_v_vv:
9081 case Intrinsic::riscv_sf_vc_v_fv:
9082 case Intrinsic::riscv_sf_vc_v_xvv:
9083 case Intrinsic::riscv_sf_vc_v_ivv:
9084 case Intrinsic::riscv_sf_vc_v_vvv:
9085 case Intrinsic::riscv_sf_vc_v_fvv:
9086 case Intrinsic::riscv_sf_vc_v_xvw:
9087 case Intrinsic::riscv_sf_vc_v_ivw:
9088 case Intrinsic::riscv_sf_vc_v_vvw:
9089 case Intrinsic::riscv_sf_vc_v_fvw: {
9090 MVT VT = Op.getSimpleValueType();
9091
9092 SmallVector<SDValue> Operands{Op->op_values()};
9094
9095 MVT RetVT = VT;
9096 if (VT.isFixedLengthVector())
9098 else if (VT.isFloatingPoint())
9101
9102 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9103
9104 if (VT.isFixedLengthVector())
9105 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9106 else if (VT.isFloatingPoint())
9107 NewNode = DAG.getBitcast(VT, NewNode);
9108
9109 if (Op == NewNode)
9110 break;
9111
9112 return NewNode;
9113 }
9114 }
9115
9116 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9117}
9118
9120 unsigned Type) {
9121 SDLoc DL(Op);
9122 SmallVector<SDValue> Operands{Op->op_values()};
9123 Operands.erase(Operands.begin() + 1);
9124
9125 const RISCVSubtarget &Subtarget =
9127 MVT VT = Op.getSimpleValueType();
9128 MVT RetVT = VT;
9129 MVT FloatVT = VT;
9130
9131 if (VT.isFloatingPoint()) {
9132 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9134 FloatVT = RetVT;
9135 }
9136 if (VT.isFixedLengthVector())
9138 Subtarget);
9139
9141
9142 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9143 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9144 SDValue Chain = NewNode.getValue(1);
9145
9146 if (VT.isFixedLengthVector())
9147 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9148 if (VT.isFloatingPoint())
9149 NewNode = DAG.getBitcast(VT, NewNode);
9150
9151 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
9152
9153 return NewNode;
9154}
9155
9157 unsigned Type) {
9158 SmallVector<SDValue> Operands{Op->op_values()};
9159 Operands.erase(Operands.begin() + 1);
9161
9162 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
9163}
9164
9165SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
9166 SelectionDAG &DAG) const {
9167 unsigned IntNo = Op.getConstantOperandVal(1);
9168 switch (IntNo) {
9169 default:
9170 break;
9171 case Intrinsic::riscv_seg2_load:
9172 case Intrinsic::riscv_seg3_load:
9173 case Intrinsic::riscv_seg4_load:
9174 case Intrinsic::riscv_seg5_load:
9175 case Intrinsic::riscv_seg6_load:
9176 case Intrinsic::riscv_seg7_load:
9177 case Intrinsic::riscv_seg8_load: {
9178 SDLoc DL(Op);
9179 static const Intrinsic::ID VlsegInts[7] = {
9180 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
9181 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
9182 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
9183 Intrinsic::riscv_vlseg8};
9184 unsigned NF = Op->getNumValues() - 1;
9185 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9186 MVT XLenVT = Subtarget.getXLenVT();
9187 MVT VT = Op->getSimpleValueType(0);
9188 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9189
9190 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
9191 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
9192 auto *Load = cast<MemIntrinsicSDNode>(Op);
9193 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
9194 ContainerVTs.push_back(MVT::Other);
9195 SDVTList VTs = DAG.getVTList(ContainerVTs);
9196 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
9197 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
9198 Ops.push_back(Op.getOperand(2));
9199 Ops.push_back(VL);
9200 SDValue Result =
9202 Load->getMemoryVT(), Load->getMemOperand());
9204 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
9205 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
9206 DAG, Subtarget));
9207 Results.push_back(Result.getValue(NF));
9208 return DAG.getMergeValues(Results, DL);
9209 }
9210 case Intrinsic::riscv_sf_vc_v_x_se:
9212 case Intrinsic::riscv_sf_vc_v_i_se:
9214 case Intrinsic::riscv_sf_vc_v_xv_se:
9216 case Intrinsic::riscv_sf_vc_v_iv_se:
9218 case Intrinsic::riscv_sf_vc_v_vv_se:
9220 case Intrinsic::riscv_sf_vc_v_fv_se:
9222 case Intrinsic::riscv_sf_vc_v_xvv_se:
9224 case Intrinsic::riscv_sf_vc_v_ivv_se:
9226 case Intrinsic::riscv_sf_vc_v_vvv_se:
9228 case Intrinsic::riscv_sf_vc_v_fvv_se:
9230 case Intrinsic::riscv_sf_vc_v_xvw_se:
9232 case Intrinsic::riscv_sf_vc_v_ivw_se:
9234 case Intrinsic::riscv_sf_vc_v_vvw_se:
9236 case Intrinsic::riscv_sf_vc_v_fvw_se:
9238 }
9239
9240 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9241}
9242
9243SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9244 SelectionDAG &DAG) const {
9245 unsigned IntNo = Op.getConstantOperandVal(1);
9246 switch (IntNo) {
9247 default:
9248 break;
9249 case Intrinsic::riscv_seg2_store:
9250 case Intrinsic::riscv_seg3_store:
9251 case Intrinsic::riscv_seg4_store:
9252 case Intrinsic::riscv_seg5_store:
9253 case Intrinsic::riscv_seg6_store:
9254 case Intrinsic::riscv_seg7_store:
9255 case Intrinsic::riscv_seg8_store: {
9256 SDLoc DL(Op);
9257 static const Intrinsic::ID VssegInts[] = {
9258 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
9259 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
9260 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
9261 Intrinsic::riscv_vsseg8};
9262 // Operands are (chain, int_id, vec*, ptr, vl)
9263 unsigned NF = Op->getNumOperands() - 4;
9264 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9265 MVT XLenVT = Subtarget.getXLenVT();
9266 MVT VT = Op->getOperand(2).getSimpleValueType();
9267 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9268
9269 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
9270 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
9271 SDValue Ptr = Op->getOperand(NF + 2);
9272
9273 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
9274 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
9275 for (unsigned i = 0; i < NF; i++)
9277 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));
9278 Ops.append({Ptr, VL});
9279
9280 return DAG.getMemIntrinsicNode(
9281 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
9282 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
9283 }
9284 case Intrinsic::riscv_sf_vc_xv_se:
9286 case Intrinsic::riscv_sf_vc_iv_se:
9288 case Intrinsic::riscv_sf_vc_vv_se:
9290 case Intrinsic::riscv_sf_vc_fv_se:
9292 case Intrinsic::riscv_sf_vc_xvv_se:
9294 case Intrinsic::riscv_sf_vc_ivv_se:
9296 case Intrinsic::riscv_sf_vc_vvv_se:
9298 case Intrinsic::riscv_sf_vc_fvv_se:
9300 case Intrinsic::riscv_sf_vc_xvw_se:
9302 case Intrinsic::riscv_sf_vc_ivw_se:
9304 case Intrinsic::riscv_sf_vc_vvw_se:
9306 case Intrinsic::riscv_sf_vc_fvw_se:
9308 }
9309
9310 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9311}
9312
9313static unsigned getRVVReductionOp(unsigned ISDOpcode) {
9314 switch (ISDOpcode) {
9315 default:
9316 llvm_unreachable("Unhandled reduction");
9317 case ISD::VP_REDUCE_ADD:
9318 case ISD::VECREDUCE_ADD:
9320 case ISD::VP_REDUCE_UMAX:
9323 case ISD::VP_REDUCE_SMAX:
9326 case ISD::VP_REDUCE_UMIN:
9329 case ISD::VP_REDUCE_SMIN:
9332 case ISD::VP_REDUCE_AND:
9333 case ISD::VECREDUCE_AND:
9335 case ISD::VP_REDUCE_OR:
9336 case ISD::VECREDUCE_OR:
9338 case ISD::VP_REDUCE_XOR:
9339 case ISD::VECREDUCE_XOR:
9341 case ISD::VP_REDUCE_FADD:
9343 case ISD::VP_REDUCE_SEQ_FADD:
9345 case ISD::VP_REDUCE_FMAX:
9346 case ISD::VP_REDUCE_FMAXIMUM:
9348 case ISD::VP_REDUCE_FMIN:
9349 case ISD::VP_REDUCE_FMINIMUM:
9351 }
9352
9353}
9354
9355SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
9356 SelectionDAG &DAG,
9357 bool IsVP) const {
9358 SDLoc DL(Op);
9359 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
9360 MVT VecVT = Vec.getSimpleValueType();
9361 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
9362 Op.getOpcode() == ISD::VECREDUCE_OR ||
9363 Op.getOpcode() == ISD::VECREDUCE_XOR ||
9364 Op.getOpcode() == ISD::VP_REDUCE_AND ||
9365 Op.getOpcode() == ISD::VP_REDUCE_OR ||
9366 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
9367 "Unexpected reduction lowering");
9368
9369 MVT XLenVT = Subtarget.getXLenVT();
9370
9371 MVT ContainerVT = VecVT;
9372 if (VecVT.isFixedLengthVector()) {
9373 ContainerVT = getContainerForFixedLengthVector(VecVT);
9374 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9375 }
9376
9377 SDValue Mask, VL;
9378 if (IsVP) {
9379 Mask = Op.getOperand(2);
9380 VL = Op.getOperand(3);
9381 } else {
9382 std::tie(Mask, VL) =
9383 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9384 }
9385
9386 unsigned BaseOpc;
9388 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9389
9390 switch (Op.getOpcode()) {
9391 default:
9392 llvm_unreachable("Unhandled reduction");
9393 case ISD::VECREDUCE_AND:
9394 case ISD::VP_REDUCE_AND: {
9395 // vcpop ~x == 0
9396 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
9397 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
9398 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9399 CC = ISD::SETEQ;
9400 BaseOpc = ISD::AND;
9401 break;
9402 }
9403 case ISD::VECREDUCE_OR:
9404 case ISD::VP_REDUCE_OR:
9405 // vcpop x != 0
9406 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9407 CC = ISD::SETNE;
9408 BaseOpc = ISD::OR;
9409 break;
9410 case ISD::VECREDUCE_XOR:
9411 case ISD::VP_REDUCE_XOR: {
9412 // ((vcpop x) & 1) != 0
9413 SDValue One = DAG.getConstant(1, DL, XLenVT);
9414 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9415 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
9416 CC = ISD::SETNE;
9417 BaseOpc = ISD::XOR;
9418 break;
9419 }
9420 }
9421
9422 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
9423 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
9424
9425 if (!IsVP)
9426 return SetCC;
9427
9428 // Now include the start value in the operation.
9429 // Note that we must return the start value when no elements are operated
9430 // upon. The vcpop instructions we've emitted in each case above will return
9431 // 0 for an inactive vector, and so we've already received the neutral value:
9432 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
9433 // can simply include the start value.
9434 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
9435}
9436
9437static bool isNonZeroAVL(SDValue AVL) {
9438 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
9439 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
9440 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
9441 (ImmAVL && ImmAVL->getZExtValue() >= 1);
9442}
9443
9444/// Helper to lower a reduction sequence of the form:
9445/// scalar = reduce_op vec, scalar_start
9446static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
9447 SDValue StartValue, SDValue Vec, SDValue Mask,
9448 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
9449 const RISCVSubtarget &Subtarget) {
9450 const MVT VecVT = Vec.getSimpleValueType();
9451 const MVT M1VT = getLMUL1VT(VecVT);
9452 const MVT XLenVT = Subtarget.getXLenVT();
9453 const bool NonZeroAVL = isNonZeroAVL(VL);
9454
9455 // The reduction needs an LMUL1 input; do the splat at either LMUL1
9456 // or the original VT if fractional.
9457 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
9458 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
9459 // prove it is non-zero. For the AVL=0 case, we need the scalar to
9460 // be the result of the reduction operation.
9461 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
9462 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
9463 DAG, Subtarget);
9464 if (M1VT != InnerVT)
9465 InitialValue =
9466 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
9467 InitialValue, DAG.getVectorIdxConstant(0, DL));
9468 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
9469 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
9470 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
9471 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
9472 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
9473 DAG.getVectorIdxConstant(0, DL));
9474}
9475
9476SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
9477 SelectionDAG &DAG) const {
9478 SDLoc DL(Op);
9479 SDValue Vec = Op.getOperand(0);
9480 EVT VecEVT = Vec.getValueType();
9481
9482 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
9483
9484 // Due to ordering in legalize types we may have a vector type that needs to
9485 // be split. Do that manually so we can get down to a legal type.
9486 while (getTypeAction(*DAG.getContext(), VecEVT) ==
9488 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
9489 VecEVT = Lo.getValueType();
9490 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
9491 }
9492
9493 // TODO: The type may need to be widened rather than split. Or widened before
9494 // it can be split.
9495 if (!isTypeLegal(VecEVT))
9496 return SDValue();
9497
9498 MVT VecVT = VecEVT.getSimpleVT();
9499 MVT VecEltVT = VecVT.getVectorElementType();
9500 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9501
9502 MVT ContainerVT = VecVT;
9503 if (VecVT.isFixedLengthVector()) {
9504 ContainerVT = getContainerForFixedLengthVector(VecVT);
9505 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9506 }
9507
9508 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9509
9510 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
9511 switch (BaseOpc) {
9512 case ISD::AND:
9513 case ISD::OR:
9514 case ISD::UMAX:
9515 case ISD::UMIN:
9516 case ISD::SMAX:
9517 case ISD::SMIN:
9518 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
9519 DAG.getVectorIdxConstant(0, DL));
9520 }
9521 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
9522 Mask, VL, DL, DAG, Subtarget);
9523}
9524
9525// Given a reduction op, this function returns the matching reduction opcode,
9526// the vector SDValue and the scalar SDValue required to lower this to a
9527// RISCVISD node.
9528static std::tuple<unsigned, SDValue, SDValue>
9530 const RISCVSubtarget &Subtarget) {
9531 SDLoc DL(Op);
9532 auto Flags = Op->getFlags();
9533 unsigned Opcode = Op.getOpcode();
9534 switch (Opcode) {
9535 default:
9536 llvm_unreachable("Unhandled reduction");
9537 case ISD::VECREDUCE_FADD: {
9538 // Use positive zero if we can. It is cheaper to materialize.
9539 SDValue Zero =
9540 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
9541 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
9542 }
9544 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
9545 Op.getOperand(0));
9549 case ISD::VECREDUCE_FMAX: {
9550 SDValue Front =
9551 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
9552 DAG.getVectorIdxConstant(0, DL));
9553 unsigned RVVOpc =
9554 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
9557 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
9558 }
9559 }
9560}
9561
9562SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
9563 SelectionDAG &DAG) const {
9564 SDLoc DL(Op);
9565 MVT VecEltVT = Op.getSimpleValueType();
9566
9567 unsigned RVVOpcode;
9568 SDValue VectorVal, ScalarVal;
9569 std::tie(RVVOpcode, VectorVal, ScalarVal) =
9570 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
9571 MVT VecVT = VectorVal.getSimpleValueType();
9572
9573 MVT ContainerVT = VecVT;
9574 if (VecVT.isFixedLengthVector()) {
9575 ContainerVT = getContainerForFixedLengthVector(VecVT);
9576 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
9577 }
9578
9579 MVT ResVT = Op.getSimpleValueType();
9580 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9581 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
9582 VL, DL, DAG, Subtarget);
9583 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
9584 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
9585 return Res;
9586
9587 if (Op->getFlags().hasNoNaNs())
9588 return Res;
9589
9590 // Force output to NaN if any element is Nan.
9591 SDValue IsNan =
9592 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
9593 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
9594 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
9595 MVT XLenVT = Subtarget.getXLenVT();
9596 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
9597 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
9598 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9599 return DAG.getSelect(
9600 DL, ResVT, NoNaNs, Res,
9602 ResVT));
9603}
9604
9605SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
9606 SelectionDAG &DAG) const {
9607 SDLoc DL(Op);
9608 unsigned Opc = Op.getOpcode();
9609 SDValue Start = Op.getOperand(0);
9610 SDValue Vec = Op.getOperand(1);
9611 EVT VecEVT = Vec.getValueType();
9612 MVT XLenVT = Subtarget.getXLenVT();
9613
9614 // TODO: The type may need to be widened rather than split. Or widened before
9615 // it can be split.
9616 if (!isTypeLegal(VecEVT))
9617 return SDValue();
9618
9619 MVT VecVT = VecEVT.getSimpleVT();
9620 unsigned RVVOpcode = getRVVReductionOp(Opc);
9621
9622 if (VecVT.isFixedLengthVector()) {
9623 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
9624 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9625 }
9626
9627 SDValue VL = Op.getOperand(3);
9628 SDValue Mask = Op.getOperand(2);
9629 SDValue Res =
9630 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
9631 Vec, Mask, VL, DL, DAG, Subtarget);
9632 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
9633 Op->getFlags().hasNoNaNs())
9634 return Res;
9635
9636 // Propagate NaNs.
9637 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
9638 // Check if any of the elements in Vec is NaN.
9639 SDValue IsNaN = DAG.getNode(
9640 RISCVISD::SETCC_VL, DL, PredVT,
9641 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
9642 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
9643 // Check if the start value is NaN.
9644 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
9645 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
9646 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
9647 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9648 MVT ResVT = Res.getSimpleValueType();
9649 return DAG.getSelect(
9650 DL, ResVT, NoNaNs, Res,
9652 ResVT));
9653}
9654
9655SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9656 SelectionDAG &DAG) const {
9657 SDValue Vec = Op.getOperand(0);
9658 SDValue SubVec = Op.getOperand(1);
9659 MVT VecVT = Vec.getSimpleValueType();
9660 MVT SubVecVT = SubVec.getSimpleValueType();
9661
9662 SDLoc DL(Op);
9663 MVT XLenVT = Subtarget.getXLenVT();
9664 unsigned OrigIdx = Op.getConstantOperandVal(2);
9665 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9666
9667 // We don't have the ability to slide mask vectors up indexed by their i1
9668 // elements; the smallest we can do is i8. Often we are able to bitcast to
9669 // equivalent i8 vectors. Note that when inserting a fixed-length vector
9670 // into a scalable one, we might not necessarily have enough scalable
9671 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
9672 if (SubVecVT.getVectorElementType() == MVT::i1 &&
9673 (OrigIdx != 0 || !Vec.isUndef())) {
9674 if (VecVT.getVectorMinNumElements() >= 8 &&
9675 SubVecVT.getVectorMinNumElements() >= 8) {
9676 assert(OrigIdx % 8 == 0 && "Invalid index");
9677 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9678 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9679 "Unexpected mask vector lowering");
9680 OrigIdx /= 8;
9681 SubVecVT =
9682 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9683 SubVecVT.isScalableVector());
9684 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9685 VecVT.isScalableVector());
9686 Vec = DAG.getBitcast(VecVT, Vec);
9687 SubVec = DAG.getBitcast(SubVecVT, SubVec);
9688 } else {
9689 // We can't slide this mask vector up indexed by its i1 elements.
9690 // This poses a problem when we wish to insert a scalable vector which
9691 // can't be re-expressed as a larger type. Just choose the slow path and
9692 // extend to a larger type, then truncate back down.
9693 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9694 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9695 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9696 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
9697 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
9698 Op.getOperand(2));
9699 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
9700 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
9701 }
9702 }
9703
9704 // If the subvector vector is a fixed-length type and we don't know VLEN
9705 // exactly, we cannot use subregister manipulation to simplify the codegen; we
9706 // don't know which register of a LMUL group contains the specific subvector
9707 // as we only know the minimum register size. Therefore we must slide the
9708 // vector group up the full amount.
9709 const auto VLen = Subtarget.getRealVLen();
9710 if (SubVecVT.isFixedLengthVector() && !VLen) {
9711 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
9712 return Op;
9713 MVT ContainerVT = VecVT;
9714 if (VecVT.isFixedLengthVector()) {
9715 ContainerVT = getContainerForFixedLengthVector(VecVT);
9716 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9717 }
9718
9719 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
9720 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9721 DAG.getUNDEF(ContainerVT), SubVec,
9722 DAG.getVectorIdxConstant(0, DL));
9723 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9724 return DAG.getBitcast(Op.getValueType(), SubVec);
9725 }
9726
9727 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9728 DAG.getUNDEF(ContainerVT), SubVec,
9729 DAG.getVectorIdxConstant(0, DL));
9730 SDValue Mask =
9731 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9732 // Set the vector length to only the number of elements we care about. Note
9733 // that for slideup this includes the offset.
9734 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
9735 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
9736
9737 // Use tail agnostic policy if we're inserting over Vec's tail.
9739 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
9740 Policy = RISCVII::TAIL_AGNOSTIC;
9741
9742 // If we're inserting into the lowest elements, use a tail undisturbed
9743 // vmv.v.v.
9744 if (OrigIdx == 0) {
9745 SubVec =
9746 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
9747 } else {
9748 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9749 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
9750 SlideupAmt, Mask, VL, Policy);
9751 }
9752
9753 if (VecVT.isFixedLengthVector())
9754 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9755 return DAG.getBitcast(Op.getValueType(), SubVec);
9756 }
9757
9758 MVT ContainerVecVT = VecVT;
9759 if (VecVT.isFixedLengthVector()) {
9760 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
9761 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
9762 }
9763
9764 MVT ContainerSubVecVT = SubVecVT;
9765 if (SubVecVT.isFixedLengthVector()) {
9766 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
9767 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
9768 }
9769
9770 unsigned SubRegIdx;
9771 ElementCount RemIdx;
9772 // insert_subvector scales the index by vscale if the subvector is scalable,
9773 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
9774 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
9775 if (SubVecVT.isFixedLengthVector()) {
9776 assert(VLen);
9777 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
9778 auto Decompose =
9780 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
9781 SubRegIdx = Decompose.first;
9782 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
9783 (OrigIdx % Vscale));
9784 } else {
9785 auto Decompose =
9787 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
9788 SubRegIdx = Decompose.first;
9789 RemIdx = ElementCount::getScalable(Decompose.second);
9790 }
9791
9794 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
9795 bool ExactlyVecRegSized =
9796 Subtarget.expandVScale(SubVecVT.getSizeInBits())
9797 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
9798
9799 // 1. If the Idx has been completely eliminated and this subvector's size is
9800 // a vector register or a multiple thereof, or the surrounding elements are
9801 // undef, then this is a subvector insert which naturally aligns to a vector
9802 // register. These can easily be handled using subregister manipulation.
9803 // 2. If the subvector isn't an exact multiple of a valid register group size,
9804 // then the insertion must preserve the undisturbed elements of the register.
9805 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
9806 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
9807 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
9808 // of that LMUL=1 type back into the larger vector (resolving to another
9809 // subregister operation). See below for how our VSLIDEUP works. We go via a
9810 // LMUL=1 type to avoid allocating a large register group to hold our
9811 // subvector.
9812 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
9813 if (SubVecVT.isFixedLengthVector()) {
9814 // We may get NoSubRegister if inserting at index 0 and the subvec
9815 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
9816 if (SubRegIdx == RISCV::NoSubRegister) {
9817 assert(OrigIdx == 0);
9818 return Op;
9819 }
9820
9821 SDValue Insert =
9822 DAG.getTargetInsertSubreg(SubRegIdx, DL, ContainerVecVT, Vec, SubVec);
9823 if (VecVT.isFixedLengthVector())
9824 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
9825 return Insert;
9826 }
9827 return Op;
9828 }
9829
9830 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
9831 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
9832 // (in our case undisturbed). This means we can set up a subvector insertion
9833 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
9834 // size of the subvector.
9835 MVT InterSubVT = ContainerVecVT;
9836 SDValue AlignedExtract = Vec;
9837 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
9838 if (SubVecVT.isFixedLengthVector())
9839 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
9840 if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
9841 InterSubVT = getLMUL1VT(ContainerVecVT);
9842 // Extract a subvector equal to the nearest full vector register type. This
9843 // should resolve to a EXTRACT_SUBREG instruction.
9844 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
9845 DAG.getVectorIdxConstant(AlignedIdx, DL));
9846 }
9847
9848 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
9849 DAG.getUNDEF(InterSubVT), SubVec,
9850 DAG.getVectorIdxConstant(0, DL));
9851
9852 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
9853
9854 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
9855 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
9856
9857 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
9859 if (Subtarget.expandVScale(EndIndex) ==
9860 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
9861 Policy = RISCVII::TAIL_AGNOSTIC;
9862
9863 // If we're inserting into the lowest elements, use a tail undisturbed
9864 // vmv.v.v.
9865 if (RemIdx.isZero()) {
9866 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
9867 SubVec, VL);
9868 } else {
9869 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
9870
9871 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
9872 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
9873
9874 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
9875 SlideupAmt, Mask, VL, Policy);
9876 }
9877
9878 // If required, insert this subvector back into the correct vector register.
9879 // This should resolve to an INSERT_SUBREG instruction.
9880 if (ContainerVecVT.bitsGT(InterSubVT))
9881 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
9882 DAG.getVectorIdxConstant(AlignedIdx, DL));
9883
9884 if (VecVT.isFixedLengthVector())
9885 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9886
9887 // We might have bitcast from a mask type: cast back to the original type if
9888 // required.
9889 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
9890}
9891
9892SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
9893 SelectionDAG &DAG) const {
9894 SDValue Vec = Op.getOperand(0);
9895 MVT SubVecVT = Op.getSimpleValueType();
9896 MVT VecVT = Vec.getSimpleValueType();
9897
9898 SDLoc DL(Op);
9899 MVT XLenVT = Subtarget.getXLenVT();
9900 unsigned OrigIdx = Op.getConstantOperandVal(1);
9901 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9902
9903 // We don't have the ability to slide mask vectors down indexed by their i1
9904 // elements; the smallest we can do is i8. Often we are able to bitcast to
9905 // equivalent i8 vectors. Note that when extracting a fixed-length vector
9906 // from a scalable one, we might not necessarily have enough scalable
9907 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
9908 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
9909 if (VecVT.getVectorMinNumElements() >= 8 &&
9910 SubVecVT.getVectorMinNumElements() >= 8) {
9911 assert(OrigIdx % 8 == 0 && "Invalid index");
9912 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9913 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9914 "Unexpected mask vector lowering");
9915 OrigIdx /= 8;
9916 SubVecVT =
9917 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9918 SubVecVT.isScalableVector());
9919 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9920 VecVT.isScalableVector());
9921 Vec = DAG.getBitcast(VecVT, Vec);
9922 } else {
9923 // We can't slide this mask vector down, indexed by its i1 elements.
9924 // This poses a problem when we wish to extract a scalable vector which
9925 // can't be re-expressed as a larger type. Just choose the slow path and
9926 // extend to a larger type, then truncate back down.
9927 // TODO: We could probably improve this when extracting certain fixed
9928 // from fixed, where we can extract as i8 and shift the correct element
9929 // right to reach the desired subvector?
9930 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9931 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9932 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9933 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
9934 Op.getOperand(1));
9935 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
9936 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
9937 }
9938 }
9939
9940 // With an index of 0 this is a cast-like subvector, which can be performed
9941 // with subregister operations.
9942 if (OrigIdx == 0)
9943 return Op;
9944
9945 const auto VLen = Subtarget.getRealVLen();
9946
9947 // If the subvector vector is a fixed-length type and we don't know VLEN
9948 // exactly, we cannot use subregister manipulation to simplify the codegen; we
9949 // don't know which register of a LMUL group contains the specific subvector
9950 // as we only know the minimum register size. Therefore we must slide the
9951 // vector group down the full amount.
9952 if (SubVecVT.isFixedLengthVector() && !VLen) {
9953 MVT ContainerVT = VecVT;
9954 if (VecVT.isFixedLengthVector()) {
9955 ContainerVT = getContainerForFixedLengthVector(VecVT);
9956 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9957 }
9958
9959 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
9960 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
9961 if (auto ShrunkVT =
9962 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
9963 ContainerVT = *ShrunkVT;
9964 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9965 DAG.getVectorIdxConstant(0, DL));
9966 }
9967
9968 SDValue Mask =
9969 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9970 // Set the vector length to only the number of elements we care about. This
9971 // avoids sliding down elements we're going to discard straight away.
9972 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
9973 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9974 SDValue Slidedown =
9975 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9976 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
9977 // Now we can use a cast-like subvector extract to get the result.
9978 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
9979 DAG.getVectorIdxConstant(0, DL));
9980 return DAG.getBitcast(Op.getValueType(), Slidedown);
9981 }
9982
9983 if (VecVT.isFixedLengthVector()) {
9984 VecVT = getContainerForFixedLengthVector(VecVT);
9985 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
9986 }
9987
9988 MVT ContainerSubVecVT = SubVecVT;
9989 if (SubVecVT.isFixedLengthVector())
9990 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
9991
9992 unsigned SubRegIdx;
9993 ElementCount RemIdx;
9994 // extract_subvector scales the index by vscale if the subvector is scalable,
9995 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
9996 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
9997 if (SubVecVT.isFixedLengthVector()) {
9998 assert(VLen);
9999 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10000 auto Decompose =
10002 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10003 SubRegIdx = Decompose.first;
10004 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10005 (OrigIdx % Vscale));
10006 } else {
10007 auto Decompose =
10009 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10010 SubRegIdx = Decompose.first;
10011 RemIdx = ElementCount::getScalable(Decompose.second);
10012 }
10013
10014 // If the Idx has been completely eliminated then this is a subvector extract
10015 // which naturally aligns to a vector register. These can easily be handled
10016 // using subregister manipulation.
10017 if (RemIdx.isZero()) {
10018 if (SubVecVT.isFixedLengthVector()) {
10019 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, ContainerSubVecVT, Vec);
10020 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10021 }
10022 return Op;
10023 }
10024
10025 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10026 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10027 // divide exactly.
10028 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10029 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10030
10031 // If the vector type is an LMUL-group type, extract a subvector equal to the
10032 // nearest full vector register type.
10033 MVT InterSubVT = VecVT;
10034 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10035 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10036 // we should have successfully decomposed the extract into a subregister.
10037 assert(SubRegIdx != RISCV::NoSubRegister);
10038 InterSubVT = getLMUL1VT(VecVT);
10039 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);
10040 }
10041
10042 // Slide this vector register down by the desired number of elements in order
10043 // to place the desired subvector starting at element 0.
10044 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10045 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10046 if (SubVecVT.isFixedLengthVector())
10047 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10048 SDValue Slidedown =
10049 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10050 Vec, SlidedownAmt, Mask, VL);
10051
10052 // Now the vector is in the right position, extract our final subvector. This
10053 // should resolve to a COPY.
10054 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10055 DAG.getVectorIdxConstant(0, DL));
10056
10057 // We might have bitcast from a mask type: cast back to the original type if
10058 // required.
10059 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10060}
10061
10062// Widen a vector's operands to i8, then truncate its results back to the
10063// original type, typically i1. All operand and result types must be the same.
10065 SelectionDAG &DAG) {
10066 MVT VT = N.getSimpleValueType();
10067 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10069 for (SDValue Op : N->ops()) {
10070 assert(Op.getSimpleValueType() == VT &&
10071 "Operands and result must be same type");
10072 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10073 }
10074
10075 unsigned NumVals = N->getNumValues();
10076
10078 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10079 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10080 SmallVector<SDValue, 4> TruncVals;
10081 for (unsigned I = 0; I < NumVals; I++) {
10082 TruncVals.push_back(
10083 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10084 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10085 }
10086
10087 if (TruncVals.size() > 1)
10088 return DAG.getMergeValues(TruncVals, DL);
10089 return TruncVals.front();
10090}
10091
10092SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10093 SelectionDAG &DAG) const {
10094 SDLoc DL(Op);
10095 MVT VecVT = Op.getSimpleValueType();
10096
10097 assert(VecVT.isScalableVector() &&
10098 "vector_interleave on non-scalable vector!");
10099
10100 // 1 bit element vectors need to be widened to e8
10101 if (VecVT.getVectorElementType() == MVT::i1)
10102 return widenVectorOpsToi8(Op, DL, DAG);
10103
10104 // If the VT is LMUL=8, we need to split and reassemble.
10105 if (VecVT.getSizeInBits().getKnownMinValue() ==
10106 (8 * RISCV::RVVBitsPerBlock)) {
10107 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10108 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10109 EVT SplitVT = Op0Lo.getValueType();
10110
10112 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10114 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10115
10116 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10117 ResLo.getValue(0), ResHi.getValue(0));
10118 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10119 ResHi.getValue(1));
10120 return DAG.getMergeValues({Even, Odd}, DL);
10121 }
10122
10123 // Concatenate the two vectors as one vector to deinterleave
10124 MVT ConcatVT =
10127 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10128 Op.getOperand(0), Op.getOperand(1));
10129
10130 // We want to operate on all lanes, so get the mask and VL and mask for it
10131 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
10132 SDValue Passthru = DAG.getUNDEF(ConcatVT);
10133
10134 // We can deinterleave through vnsrl.wi if the element type is smaller than
10135 // ELEN
10136 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10137 SDValue Even =
10138 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
10139 SDValue Odd =
10140 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
10141 return DAG.getMergeValues({Even, Odd}, DL);
10142 }
10143
10144 // For the indices, use the same SEW to avoid an extra vsetvli
10145 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
10146 // Create a vector of even indices {0, 2, 4, ...}
10147 SDValue EvenIdx =
10148 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
10149 // Create a vector of odd indices {1, 3, 5, ... }
10150 SDValue OddIdx =
10151 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
10152
10153 // Gather the even and odd elements into two separate vectors
10154 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10155 Concat, EvenIdx, Passthru, Mask, VL);
10156 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10157 Concat, OddIdx, Passthru, Mask, VL);
10158
10159 // Extract the result half of the gather for even and odd
10160 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
10161 DAG.getVectorIdxConstant(0, DL));
10162 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
10163 DAG.getVectorIdxConstant(0, DL));
10164
10165 return DAG.getMergeValues({Even, Odd}, DL);
10166}
10167
10168SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
10169 SelectionDAG &DAG) const {
10170 SDLoc DL(Op);
10171 MVT VecVT = Op.getSimpleValueType();
10172
10173 assert(VecVT.isScalableVector() &&
10174 "vector_interleave on non-scalable vector!");
10175
10176 // i1 vectors need to be widened to i8
10177 if (VecVT.getVectorElementType() == MVT::i1)
10178 return widenVectorOpsToi8(Op, DL, DAG);
10179
10180 MVT XLenVT = Subtarget.getXLenVT();
10181 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
10182
10183 // If the VT is LMUL=8, we need to split and reassemble.
10184 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
10185 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10186 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10187 EVT SplitVT = Op0Lo.getValueType();
10188
10190 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
10192 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
10193
10194 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10195 ResLo.getValue(0), ResLo.getValue(1));
10196 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10197 ResHi.getValue(0), ResHi.getValue(1));
10198 return DAG.getMergeValues({Lo, Hi}, DL);
10199 }
10200
10201 SDValue Interleaved;
10202
10203 // If the element type is smaller than ELEN, then we can interleave with
10204 // vwaddu.vv and vwmaccu.vx
10205 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10206 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
10207 DAG, Subtarget);
10208 } else {
10209 // Otherwise, fallback to using vrgathere16.vv
10210 MVT ConcatVT =
10213 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10214 Op.getOperand(0), Op.getOperand(1));
10215
10216 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
10217
10218 // 0 1 2 3 4 5 6 7 ...
10219 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
10220
10221 // 1 1 1 1 1 1 1 1 ...
10222 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
10223
10224 // 1 0 1 0 1 0 1 0 ...
10225 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
10226 OddMask = DAG.getSetCC(
10227 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
10228 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
10230
10231 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
10232
10233 // Build up the index vector for interleaving the concatenated vector
10234 // 0 0 1 1 2 2 3 3 ...
10235 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
10236 // 0 n 1 n+1 2 n+2 3 n+3 ...
10237 Idx =
10238 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
10239
10240 // Then perform the interleave
10241 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
10242 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
10243 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
10244 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
10245 }
10246
10247 // Extract the two halves from the interleaved result
10248 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10249 DAG.getVectorIdxConstant(0, DL));
10250 SDValue Hi = DAG.getNode(
10251 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10253
10254 return DAG.getMergeValues({Lo, Hi}, DL);
10255}
10256
10257// Lower step_vector to the vid instruction. Any non-identity step value must
10258// be accounted for my manual expansion.
10259SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
10260 SelectionDAG &DAG) const {
10261 SDLoc DL(Op);
10262 MVT VT = Op.getSimpleValueType();
10263 assert(VT.isScalableVector() && "Expected scalable vector");
10264 MVT XLenVT = Subtarget.getXLenVT();
10265 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
10266 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10267 uint64_t StepValImm = Op.getConstantOperandVal(0);
10268 if (StepValImm != 1) {
10269 if (isPowerOf2_64(StepValImm)) {
10270 SDValue StepVal =
10271 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10272 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
10273 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
10274 } else {
10275 SDValue StepVal = lowerScalarSplat(
10276 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
10277 VL, VT, DL, DAG, Subtarget);
10278 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
10279 }
10280 }
10281 return StepVec;
10282}
10283
10284// Implement vector_reverse using vrgather.vv with indices determined by
10285// subtracting the id of each element from (VLMAX-1). This will convert
10286// the indices like so:
10287// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
10288// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
10289SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
10290 SelectionDAG &DAG) const {
10291 SDLoc DL(Op);
10292 MVT VecVT = Op.getSimpleValueType();
10293 if (VecVT.getVectorElementType() == MVT::i1) {
10294 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10295 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
10296 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
10297 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
10298 }
10299 unsigned EltSize = VecVT.getScalarSizeInBits();
10300 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
10301 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
10302 unsigned MaxVLMAX =
10303 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
10304
10305 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
10306 MVT IntVT = VecVT.changeVectorElementTypeToInteger();
10307
10308 // If this is SEW=8 and VLMAX is potentially more than 256, we need
10309 // to use vrgatherei16.vv.
10310 // TODO: It's also possible to use vrgatherei16.vv for other types to
10311 // decrease register width for the index calculation.
10312 if (MaxVLMAX > 256 && EltSize == 8) {
10313 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
10314 // Reverse each half, then reassemble them in reverse order.
10315 // NOTE: It's also possible that after splitting that VLMAX no longer
10316 // requires vrgatherei16.vv.
10317 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
10318 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10319 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
10320 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
10321 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
10322 // Reassemble the low and high pieces reversed.
10323 // FIXME: This is a CONCAT_VECTORS.
10324 SDValue Res =
10325 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
10326 DAG.getVectorIdxConstant(0, DL));
10327 return DAG.getNode(
10328 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
10329 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
10330 }
10331
10332 // Just promote the int type to i16 which will double the LMUL.
10333 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
10334 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
10335 }
10336
10337 MVT XLenVT = Subtarget.getXLenVT();
10338 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
10339
10340 // Calculate VLMAX-1 for the desired SEW.
10341 SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT,
10342 computeVLMax(VecVT, DL, DAG),
10343 DAG.getConstant(1, DL, XLenVT));
10344
10345 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
10346 bool IsRV32E64 =
10347 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
10348 SDValue SplatVL;
10349 if (!IsRV32E64)
10350 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
10351 else
10352 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
10353 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
10354
10355 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
10356 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
10357 DAG.getUNDEF(IntVT), Mask, VL);
10358
10359 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices,
10360 DAG.getUNDEF(VecVT), Mask, VL);
10361}
10362
10363SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
10364 SelectionDAG &DAG) const {
10365 SDLoc DL(Op);
10366 SDValue V1 = Op.getOperand(0);
10367 SDValue V2 = Op.getOperand(1);
10368 MVT XLenVT = Subtarget.getXLenVT();
10369 MVT VecVT = Op.getSimpleValueType();
10370
10371 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
10372
10373 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
10374 SDValue DownOffset, UpOffset;
10375 if (ImmValue >= 0) {
10376 // The operand is a TargetConstant, we need to rebuild it as a regular
10377 // constant.
10378 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
10379 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
10380 } else {
10381 // The operand is a TargetConstant, we need to rebuild it as a regular
10382 // constant rather than negating the original operand.
10383 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
10384 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
10385 }
10386
10387 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
10388
10389 SDValue SlideDown =
10390 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
10391 DownOffset, TrueMask, UpOffset);
10392 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
10393 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
10395}
10396
10397SDValue
10398RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
10399 SelectionDAG &DAG) const {
10400 SDLoc DL(Op);
10401 auto *Load = cast<LoadSDNode>(Op);
10402
10404 Load->getMemoryVT(),
10405 *Load->getMemOperand()) &&
10406 "Expecting a correctly-aligned load");
10407
10408 MVT VT = Op.getSimpleValueType();
10409 MVT XLenVT = Subtarget.getXLenVT();
10410 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10411
10412 // If we know the exact VLEN and our fixed length vector completely fills
10413 // the container, use a whole register load instead.
10414 const auto [MinVLMAX, MaxVLMAX] =
10415 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10416 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10417 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10418 MachineMemOperand *MMO = Load->getMemOperand();
10419 SDValue NewLoad =
10420 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
10421 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
10422 MMO->getAAInfo(), MMO->getRanges());
10423 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10424 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10425 }
10426
10427 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10428
10429 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10430 SDValue IntID = DAG.getTargetConstant(
10431 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
10432 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
10433 if (!IsMaskOp)
10434 Ops.push_back(DAG.getUNDEF(ContainerVT));
10435 Ops.push_back(Load->getBasePtr());
10436 Ops.push_back(VL);
10437 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10438 SDValue NewLoad =
10440 Load->getMemoryVT(), Load->getMemOperand());
10441
10442 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10443 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10444}
10445
10446SDValue
10447RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
10448 SelectionDAG &DAG) const {
10449 SDLoc DL(Op);
10450 auto *Store = cast<StoreSDNode>(Op);
10451
10453 Store->getMemoryVT(),
10454 *Store->getMemOperand()) &&
10455 "Expecting a correctly-aligned store");
10456
10457 SDValue StoreVal = Store->getValue();
10458 MVT VT = StoreVal.getSimpleValueType();
10459 MVT XLenVT = Subtarget.getXLenVT();
10460
10461 // If the size less than a byte, we need to pad with zeros to make a byte.
10462 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
10463 VT = MVT::v8i1;
10464 StoreVal =
10465 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
10466 StoreVal, DAG.getVectorIdxConstant(0, DL));
10467 }
10468
10469 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10470
10471 SDValue NewValue =
10472 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
10473
10474 // If we know the exact VLEN and our fixed length vector completely fills
10475 // the container, use a whole register store instead.
10476 const auto [MinVLMAX, MaxVLMAX] =
10477 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10478 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10479 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10480 MachineMemOperand *MMO = Store->getMemOperand();
10481 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
10482 MMO->getPointerInfo(), MMO->getBaseAlign(),
10483 MMO->getFlags(), MMO->getAAInfo());
10484 }
10485
10486 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10487
10488 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10489 SDValue IntID = DAG.getTargetConstant(
10490 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
10491 return DAG.getMemIntrinsicNode(
10492 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
10493 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
10494 Store->getMemoryVT(), Store->getMemOperand());
10495}
10496
10497SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
10498 SelectionDAG &DAG) const {
10499 SDLoc DL(Op);
10500 MVT VT = Op.getSimpleValueType();
10501
10502 const auto *MemSD = cast<MemSDNode>(Op);
10503 EVT MemVT = MemSD->getMemoryVT();
10504 MachineMemOperand *MMO = MemSD->getMemOperand();
10505 SDValue Chain = MemSD->getChain();
10506 SDValue BasePtr = MemSD->getBasePtr();
10507
10508 SDValue Mask, PassThru, VL;
10509 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
10510 Mask = VPLoad->getMask();
10511 PassThru = DAG.getUNDEF(VT);
10512 VL = VPLoad->getVectorLength();
10513 } else {
10514 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
10515 Mask = MLoad->getMask();
10516 PassThru = MLoad->getPassThru();
10517 }
10518
10519 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10520
10521 MVT XLenVT = Subtarget.getXLenVT();
10522
10523 MVT ContainerVT = VT;
10524 if (VT.isFixedLengthVector()) {
10525 ContainerVT = getContainerForFixedLengthVector(VT);
10526 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
10527 if (!IsUnmasked) {
10528 MVT MaskVT = getMaskTypeFor(ContainerVT);
10529 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10530 }
10531 }
10532
10533 if (!VL)
10534 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10535
10536 unsigned IntID =
10537 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
10538 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10539 if (IsUnmasked)
10540 Ops.push_back(DAG.getUNDEF(ContainerVT));
10541 else
10542 Ops.push_back(PassThru);
10543 Ops.push_back(BasePtr);
10544 if (!IsUnmasked)
10545 Ops.push_back(Mask);
10546 Ops.push_back(VL);
10547 if (!IsUnmasked)
10549
10550 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10551
10552 SDValue Result =
10553 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
10554 Chain = Result.getValue(1);
10555
10556 if (VT.isFixedLengthVector())
10557 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10558
10559 return DAG.getMergeValues({Result, Chain}, DL);
10560}
10561
10562SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
10563 SelectionDAG &DAG) const {
10564 SDLoc DL(Op);
10565
10566 const auto *MemSD = cast<MemSDNode>(Op);
10567 EVT MemVT = MemSD->getMemoryVT();
10568 MachineMemOperand *MMO = MemSD->getMemOperand();
10569 SDValue Chain = MemSD->getChain();
10570 SDValue BasePtr = MemSD->getBasePtr();
10571 SDValue Val, Mask, VL;
10572
10573 bool IsCompressingStore = false;
10574 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
10575 Val = VPStore->getValue();
10576 Mask = VPStore->getMask();
10577 VL = VPStore->getVectorLength();
10578 } else {
10579 const auto *MStore = cast<MaskedStoreSDNode>(Op);
10580 Val = MStore->getValue();
10581 Mask = MStore->getMask();
10582 IsCompressingStore = MStore->isCompressingStore();
10583 }
10584
10585 bool IsUnmasked =
10586 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
10587
10588 MVT VT = Val.getSimpleValueType();
10589 MVT XLenVT = Subtarget.getXLenVT();
10590
10591 MVT ContainerVT = VT;
10592 if (VT.isFixedLengthVector()) {
10593 ContainerVT = getContainerForFixedLengthVector(VT);
10594
10595 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
10596 if (!IsUnmasked || IsCompressingStore) {
10597 MVT MaskVT = getMaskTypeFor(ContainerVT);
10598 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10599 }
10600 }
10601
10602 if (!VL)
10603 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10604
10605 if (IsCompressingStore) {
10606 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
10607 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
10608 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
10609 VL =
10610 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
10611 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
10612 }
10613
10614 unsigned IntID =
10615 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
10616 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10617 Ops.push_back(Val);
10618 Ops.push_back(BasePtr);
10619 if (!IsUnmasked)
10620 Ops.push_back(Mask);
10621 Ops.push_back(VL);
10622
10624 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
10625}
10626
10627SDValue
10628RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
10629 SelectionDAG &DAG) const {
10630 MVT InVT = Op.getOperand(0).getSimpleValueType();
10631 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
10632
10633 MVT VT = Op.getSimpleValueType();
10634
10635 SDValue Op1 =
10636 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
10637 SDValue Op2 =
10638 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10639
10640 SDLoc DL(Op);
10641 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
10642 DAG, Subtarget);
10643 MVT MaskVT = getMaskTypeFor(ContainerVT);
10644
10645 SDValue Cmp =
10646 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10647 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
10648
10649 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
10650}
10651
10652SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
10653 SelectionDAG &DAG) const {
10654 unsigned Opc = Op.getOpcode();
10655 SDLoc DL(Op);
10656 SDValue Chain = Op.getOperand(0);
10657 SDValue Op1 = Op.getOperand(1);
10658 SDValue Op2 = Op.getOperand(2);
10659 SDValue CC = Op.getOperand(3);
10660 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
10661 MVT VT = Op.getSimpleValueType();
10662 MVT InVT = Op1.getSimpleValueType();
10663
10664 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
10665 // condition code.
10666 if (Opc == ISD::STRICT_FSETCCS) {
10667 // Expand strict_fsetccs(x, oeq) to
10668 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
10669 SDVTList VTList = Op->getVTList();
10670 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
10671 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
10672 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10673 Op2, OLECCVal);
10674 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
10675 Op1, OLECCVal);
10676 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
10677 Tmp1.getValue(1), Tmp2.getValue(1));
10678 // Tmp1 and Tmp2 might be the same node.
10679 if (Tmp1 != Tmp2)
10680 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
10681 return DAG.getMergeValues({Tmp1, OutChain}, DL);
10682 }
10683
10684 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
10685 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
10686 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
10687 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10688 Op2, OEQCCVal);
10689 SDValue Res = DAG.getNOT(DL, OEQ, VT);
10690 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
10691 }
10692 }
10693
10694 MVT ContainerInVT = InVT;
10695 if (InVT.isFixedLengthVector()) {
10696 ContainerInVT = getContainerForFixedLengthVector(InVT);
10697 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
10698 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
10699 }
10700 MVT MaskVT = getMaskTypeFor(ContainerInVT);
10701
10702 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
10703
10704 SDValue Res;
10705 if (Opc == ISD::STRICT_FSETCC &&
10706 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
10707 CCVal == ISD::SETOLE)) {
10708 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
10709 // active when both input elements are ordered.
10710 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
10711 SDValue OrderMask1 = DAG.getNode(
10712 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10713 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10714 True, VL});
10715 SDValue OrderMask2 = DAG.getNode(
10716 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10717 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10718 True, VL});
10719 Mask =
10720 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
10721 // Use Mask as the passthru operand to let the result be 0 if either of the
10722 // inputs is unordered.
10724 DAG.getVTList(MaskVT, MVT::Other),
10725 {Chain, Op1, Op2, CC, Mask, Mask, VL});
10726 } else {
10727 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
10729 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
10730 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
10731 }
10732
10733 if (VT.isFixedLengthVector()) {
10734 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10735 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10736 }
10737 return Res;
10738}
10739
10740// Lower vector ABS to smax(X, sub(0, X)).
10741SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
10742 SDLoc DL(Op);
10743 MVT VT = Op.getSimpleValueType();
10744 SDValue X = Op.getOperand(0);
10745
10746 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
10747 "Unexpected type for ISD::ABS");
10748
10749 MVT ContainerVT = VT;
10750 if (VT.isFixedLengthVector()) {
10751 ContainerVT = getContainerForFixedLengthVector(VT);
10752 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
10753 }
10754
10755 SDValue Mask, VL;
10756 if (Op->getOpcode() == ISD::VP_ABS) {
10757 Mask = Op->getOperand(1);
10758 if (VT.isFixedLengthVector())
10759 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
10760 Subtarget);
10761 VL = Op->getOperand(2);
10762 } else
10763 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10764
10765 SDValue SplatZero = DAG.getNode(
10766 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
10767 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
10768 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
10769 DAG.getUNDEF(ContainerVT), Mask, VL);
10770 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
10771 DAG.getUNDEF(ContainerVT), Mask, VL);
10772
10773 if (VT.isFixedLengthVector())
10774 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
10775 return Max;
10776}
10777
10778SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
10779 SDValue Op, SelectionDAG &DAG) const {
10780 SDLoc DL(Op);
10781 MVT VT = Op.getSimpleValueType();
10782 SDValue Mag = Op.getOperand(0);
10783 SDValue Sign = Op.getOperand(1);
10784 assert(Mag.getValueType() == Sign.getValueType() &&
10785 "Can only handle COPYSIGN with matching types.");
10786
10787 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10788 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
10789 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
10790
10791 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10792
10793 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
10794 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
10795
10796 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
10797}
10798
10799SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
10800 SDValue Op, SelectionDAG &DAG) const {
10801 MVT VT = Op.getSimpleValueType();
10802 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10803
10804 MVT I1ContainerVT =
10805 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10806
10807 SDValue CC =
10808 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
10809 SDValue Op1 =
10810 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10811 SDValue Op2 =
10812 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
10813
10814 SDLoc DL(Op);
10815 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10816
10817 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
10818 Op2, DAG.getUNDEF(ContainerVT), VL);
10819
10820 return convertFromScalableVector(VT, Select, DAG, Subtarget);
10821}
10822
10823SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
10824 SelectionDAG &DAG) const {
10825 unsigned NewOpc = getRISCVVLOp(Op);
10826 bool HasPassthruOp = hasPassthruOp(NewOpc);
10827 bool HasMask = hasMaskOp(NewOpc);
10828
10829 MVT VT = Op.getSimpleValueType();
10830 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10831
10832 // Create list of operands by converting existing ones to scalable types.
10834 for (const SDValue &V : Op->op_values()) {
10835 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10836
10837 // Pass through non-vector operands.
10838 if (!V.getValueType().isVector()) {
10839 Ops.push_back(V);
10840 continue;
10841 }
10842
10843 // "cast" fixed length vector to a scalable vector.
10844 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
10845 "Only fixed length vectors are supported!");
10846 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10847 }
10848
10849 SDLoc DL(Op);
10850 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10851 if (HasPassthruOp)
10852 Ops.push_back(DAG.getUNDEF(ContainerVT));
10853 if (HasMask)
10854 Ops.push_back(Mask);
10855 Ops.push_back(VL);
10856
10857 // StrictFP operations have two result values. Their lowered result should
10858 // have same result count.
10859 if (Op->isStrictFPOpcode()) {
10860 SDValue ScalableRes =
10861 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
10862 Op->getFlags());
10863 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10864 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
10865 }
10866
10867 SDValue ScalableRes =
10868 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
10869 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10870}
10871
10872// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
10873// * Operands of each node are assumed to be in the same order.
10874// * The EVL operand is promoted from i32 to i64 on RV64.
10875// * Fixed-length vectors are converted to their scalable-vector container
10876// types.
10877SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
10878 unsigned RISCVISDOpc = getRISCVVLOp(Op);
10879 bool HasPassthruOp = hasPassthruOp(RISCVISDOpc);
10880
10881 SDLoc DL(Op);
10882 MVT VT = Op.getSimpleValueType();
10884
10885 MVT ContainerVT = VT;
10886 if (VT.isFixedLengthVector())
10887 ContainerVT = getContainerForFixedLengthVector(VT);
10888
10889 for (const auto &OpIdx : enumerate(Op->ops())) {
10890 SDValue V = OpIdx.value();
10891 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10892 // Add dummy passthru value before the mask. Or if there isn't a mask,
10893 // before EVL.
10894 if (HasPassthruOp) {
10895 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
10896 if (MaskIdx) {
10897 if (*MaskIdx == OpIdx.index())
10898 Ops.push_back(DAG.getUNDEF(ContainerVT));
10899 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
10900 OpIdx.index()) {
10901 if (Op.getOpcode() == ISD::VP_MERGE) {
10902 // For VP_MERGE, copy the false operand instead of an undef value.
10903 Ops.push_back(Ops.back());
10904 } else {
10905 assert(Op.getOpcode() == ISD::VP_SELECT);
10906 // For VP_SELECT, add an undef value.
10907 Ops.push_back(DAG.getUNDEF(ContainerVT));
10908 }
10909 }
10910 }
10911 // Pass through operands which aren't fixed-length vectors.
10912 if (!V.getValueType().isFixedLengthVector()) {
10913 Ops.push_back(V);
10914 continue;
10915 }
10916 // "cast" fixed length vector to a scalable vector.
10917 MVT OpVT = V.getSimpleValueType();
10918 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
10919 assert(useRVVForFixedLengthVectorVT(OpVT) &&
10920 "Only fixed length vectors are supported!");
10921 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10922 }
10923
10924 if (!VT.isFixedLengthVector())
10925 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
10926
10927 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
10928
10929 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
10930}
10931
10932SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
10933 SelectionDAG &DAG) const {
10934 SDLoc DL(Op);
10935 MVT VT = Op.getSimpleValueType();
10936
10937 SDValue Src = Op.getOperand(0);
10938 // NOTE: Mask is dropped.
10939 SDValue VL = Op.getOperand(2);
10940
10941 MVT ContainerVT = VT;
10942 if (VT.isFixedLengthVector()) {
10943 ContainerVT = getContainerForFixedLengthVector(VT);
10944 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10945 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
10946 }
10947
10948 MVT XLenVT = Subtarget.getXLenVT();
10949 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10950 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10951 DAG.getUNDEF(ContainerVT), Zero, VL);
10952
10953 SDValue SplatValue = DAG.getConstant(
10954 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
10955 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10956 DAG.getUNDEF(ContainerVT), SplatValue, VL);
10957
10958 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
10959 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
10960 if (!VT.isFixedLengthVector())
10961 return Result;
10962 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10963}
10964
10965SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
10966 SelectionDAG &DAG) const {
10967 SDLoc DL(Op);
10968 MVT VT = Op.getSimpleValueType();
10969
10970 SDValue Op1 = Op.getOperand(0);
10971 SDValue Op2 = Op.getOperand(1);
10972 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10973 // NOTE: Mask is dropped.
10974 SDValue VL = Op.getOperand(4);
10975
10976 MVT ContainerVT = VT;
10977 if (VT.isFixedLengthVector()) {
10978 ContainerVT = getContainerForFixedLengthVector(VT);
10979 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
10980 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
10981 }
10982
10984 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
10985
10986 switch (Condition) {
10987 default:
10988 break;
10989 // X != Y --> (X^Y)
10990 case ISD::SETNE:
10991 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
10992 break;
10993 // X == Y --> ~(X^Y)
10994 case ISD::SETEQ: {
10995 SDValue Temp =
10996 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
10997 Result =
10998 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
10999 break;
11000 }
11001 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11002 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11003 case ISD::SETGT:
11004 case ISD::SETULT: {
11005 SDValue Temp =
11006 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11007 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
11008 break;
11009 }
11010 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
11011 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
11012 case ISD::SETLT:
11013 case ISD::SETUGT: {
11014 SDValue Temp =
11015 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11016 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
11017 break;
11018 }
11019 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11020 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11021 case ISD::SETGE:
11022 case ISD::SETULE: {
11023 SDValue Temp =
11024 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11025 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
11026 break;
11027 }
11028 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11029 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11030 case ISD::SETLE:
11031 case ISD::SETUGE: {
11032 SDValue Temp =
11033 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11034 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
11035 break;
11036 }
11037 }
11038
11039 if (!VT.isFixedLengthVector())
11040 return Result;
11041 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11042}
11043
11044// Lower Floating-Point/Integer Type-Convert VP SDNodes
11045SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
11046 SelectionDAG &DAG) const {
11047 SDLoc DL(Op);
11048
11049 SDValue Src = Op.getOperand(0);
11050 SDValue Mask = Op.getOperand(1);
11051 SDValue VL = Op.getOperand(2);
11052 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11053
11054 MVT DstVT = Op.getSimpleValueType();
11055 MVT SrcVT = Src.getSimpleValueType();
11056 if (DstVT.isFixedLengthVector()) {
11057 DstVT = getContainerForFixedLengthVector(DstVT);
11058 SrcVT = getContainerForFixedLengthVector(SrcVT);
11059 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11060 MVT MaskVT = getMaskTypeFor(DstVT);
11061 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11062 }
11063
11064 unsigned DstEltSize = DstVT.getScalarSizeInBits();
11065 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
11066
11068 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
11069 if (SrcVT.isInteger()) {
11070 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11071
11072 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
11075
11076 // Do we need to do any pre-widening before converting?
11077 if (SrcEltSize == 1) {
11078 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
11079 MVT XLenVT = Subtarget.getXLenVT();
11080 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11081 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11082 DAG.getUNDEF(IntVT), Zero, VL);
11083 SDValue One = DAG.getConstant(
11084 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
11085 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11086 DAG.getUNDEF(IntVT), One, VL);
11087 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
11088 ZeroSplat, DAG.getUNDEF(IntVT), VL);
11089 } else if (DstEltSize > (2 * SrcEltSize)) {
11090 // Widen before converting.
11091 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
11092 DstVT.getVectorElementCount());
11093 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
11094 }
11095
11096 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11097 } else {
11098 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11099 "Wrong input/output vector types");
11100
11101 // Convert f16 to f32 then convert f32 to i64.
11102 if (DstEltSize > (2 * SrcEltSize)) {
11103 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11104 MVT InterimFVT =
11105 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11106 Src =
11107 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
11108 }
11109
11110 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11111 }
11112 } else { // Narrowing + Conversion
11113 if (SrcVT.isInteger()) {
11114 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11115 // First do a narrowing convert to an FP type half the size, then round
11116 // the FP type to a small FP type if needed.
11117
11118 MVT InterimFVT = DstVT;
11119 if (SrcEltSize > (2 * DstEltSize)) {
11120 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
11121 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11122 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11123 }
11124
11125 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
11126
11127 if (InterimFVT != DstVT) {
11128 Src = Result;
11129 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
11130 }
11131 } else {
11132 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11133 "Wrong input/output vector types");
11134 // First do a narrowing conversion to an integer half the size, then
11135 // truncate if needed.
11136
11137 if (DstEltSize == 1) {
11138 // First convert to the same size integer, then convert to mask using
11139 // setcc.
11140 assert(SrcEltSize >= 16 && "Unexpected FP type!");
11141 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
11142 DstVT.getVectorElementCount());
11143 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11144
11145 // Compare the integer result to 0. The integer should be 0 or 1/-1,
11146 // otherwise the conversion was undefined.
11147 MVT XLenVT = Subtarget.getXLenVT();
11148 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
11149 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
11150 DAG.getUNDEF(InterimIVT), SplatZero, VL);
11151 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
11152 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
11153 DAG.getUNDEF(DstVT), Mask, VL});
11154 } else {
11155 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11156 DstVT.getVectorElementCount());
11157
11158 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11159
11160 while (InterimIVT != DstVT) {
11161 SrcEltSize /= 2;
11162 Src = Result;
11163 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11164 DstVT.getVectorElementCount());
11165 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
11166 Src, Mask, VL);
11167 }
11168 }
11169 }
11170 }
11171
11172 MVT VT = Op.getSimpleValueType();
11173 if (!VT.isFixedLengthVector())
11174 return Result;
11175 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11176}
11177
11178SDValue
11179RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
11180 SelectionDAG &DAG) const {
11181 SDLoc DL(Op);
11182
11183 SDValue Op1 = Op.getOperand(0);
11184 SDValue Op2 = Op.getOperand(1);
11185 SDValue Offset = Op.getOperand(2);
11186 SDValue Mask = Op.getOperand(3);
11187 SDValue EVL1 = Op.getOperand(4);
11188 SDValue EVL2 = Op.getOperand(5);
11189
11190 const MVT XLenVT = Subtarget.getXLenVT();
11191 MVT VT = Op.getSimpleValueType();
11192 MVT ContainerVT = VT;
11193 if (VT.isFixedLengthVector()) {
11194 ContainerVT = getContainerForFixedLengthVector(VT);
11195 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11196 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11197 MVT MaskVT = getMaskTypeFor(ContainerVT);
11198 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11199 }
11200
11201 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
11202 if (IsMaskVector) {
11203 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
11204
11205 // Expand input operands
11206 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11207 DAG.getUNDEF(ContainerVT),
11208 DAG.getConstant(1, DL, XLenVT), EVL1);
11209 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11210 DAG.getUNDEF(ContainerVT),
11211 DAG.getConstant(0, DL, XLenVT), EVL1);
11212 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
11213 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
11214
11215 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11216 DAG.getUNDEF(ContainerVT),
11217 DAG.getConstant(1, DL, XLenVT), EVL2);
11218 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11219 DAG.getUNDEF(ContainerVT),
11220 DAG.getConstant(0, DL, XLenVT), EVL2);
11221 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
11222 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
11223 }
11224
11225 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
11226 SDValue DownOffset, UpOffset;
11227 if (ImmValue >= 0) {
11228 // The operand is a TargetConstant, we need to rebuild it as a regular
11229 // constant.
11230 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11231 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
11232 } else {
11233 // The operand is a TargetConstant, we need to rebuild it as a regular
11234 // constant rather than negating the original operand.
11235 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11236 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
11237 }
11238
11239 SDValue SlideDown =
11240 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11241 Op1, DownOffset, Mask, UpOffset);
11242 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
11243 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
11244
11245 if (IsMaskVector) {
11246 // Truncate Result back to a mask vector (Result has same EVL as Op2)
11247 Result = DAG.getNode(
11248 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
11249 {Result, DAG.getConstant(0, DL, ContainerVT),
11250 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
11251 Mask, EVL2});
11252 }
11253
11254 if (!VT.isFixedLengthVector())
11255 return Result;
11256 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11257}
11258
11259SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
11260 SelectionDAG &DAG) const {
11261 SDLoc DL(Op);
11262 SDValue Val = Op.getOperand(0);
11263 SDValue Mask = Op.getOperand(1);
11264 SDValue VL = Op.getOperand(2);
11265 MVT VT = Op.getSimpleValueType();
11266
11267 MVT ContainerVT = VT;
11268 if (VT.isFixedLengthVector()) {
11269 ContainerVT = getContainerForFixedLengthVector(VT);
11270 MVT MaskVT = getMaskTypeFor(ContainerVT);
11271 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11272 }
11273
11274 SDValue Result =
11275 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
11276
11277 if (!VT.isFixedLengthVector())
11278 return Result;
11279 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11280}
11281
11282SDValue
11283RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
11284 SelectionDAG &DAG) const {
11285 SDLoc DL(Op);
11286 MVT VT = Op.getSimpleValueType();
11287 MVT XLenVT = Subtarget.getXLenVT();
11288
11289 SDValue Op1 = Op.getOperand(0);
11290 SDValue Mask = Op.getOperand(1);
11291 SDValue EVL = Op.getOperand(2);
11292
11293 MVT ContainerVT = VT;
11294 if (VT.isFixedLengthVector()) {
11295 ContainerVT = getContainerForFixedLengthVector(VT);
11296 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11297 MVT MaskVT = getMaskTypeFor(ContainerVT);
11298 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11299 }
11300
11301 MVT GatherVT = ContainerVT;
11302 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
11303 // Check if we are working with mask vectors
11304 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
11305 if (IsMaskVector) {
11306 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
11307
11308 // Expand input operand
11309 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11310 DAG.getUNDEF(IndicesVT),
11311 DAG.getConstant(1, DL, XLenVT), EVL);
11312 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11313 DAG.getUNDEF(IndicesVT),
11314 DAG.getConstant(0, DL, XLenVT), EVL);
11315 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
11316 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
11317 }
11318
11319 unsigned EltSize = GatherVT.getScalarSizeInBits();
11320 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
11321 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11322 unsigned MaxVLMAX =
11323 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11324
11325 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11326 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
11327 // to use vrgatherei16.vv.
11328 // TODO: It's also possible to use vrgatherei16.vv for other types to
11329 // decrease register width for the index calculation.
11330 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11331 if (MaxVLMAX > 256 && EltSize == 8) {
11332 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
11333 // Split the vector in half and reverse each half using a full register
11334 // reverse.
11335 // Swap the halves and concatenate them.
11336 // Slide the concatenated result by (VLMax - VL).
11337 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11338 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
11339 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
11340
11341 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11342 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11343
11344 // Reassemble the low and high pieces reversed.
11345 // NOTE: this Result is unmasked (because we do not need masks for
11346 // shuffles). If in the future this has to change, we can use a SELECT_VL
11347 // between Result and UNDEF using the mask originally passed to VP_REVERSE
11348 SDValue Result =
11349 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
11350
11351 // Slide off any elements from past EVL that were reversed into the low
11352 // elements.
11353 unsigned MinElts = GatherVT.getVectorMinNumElements();
11354 SDValue VLMax =
11355 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
11356 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
11357
11358 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
11359 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
11360
11361 if (IsMaskVector) {
11362 // Truncate Result back to a mask vector
11363 Result =
11364 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
11365 {Result, DAG.getConstant(0, DL, GatherVT),
11367 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11368 }
11369
11370 if (!VT.isFixedLengthVector())
11371 return Result;
11372 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11373 }
11374
11375 // Just promote the int type to i16 which will double the LMUL.
11376 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
11377 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11378 }
11379
11380 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
11381 SDValue VecLen =
11382 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
11383 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11384 DAG.getUNDEF(IndicesVT), VecLen, EVL);
11385 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
11386 DAG.getUNDEF(IndicesVT), Mask, EVL);
11387 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
11388 DAG.getUNDEF(GatherVT), Mask, EVL);
11389
11390 if (IsMaskVector) {
11391 // Truncate Result back to a mask vector
11392 Result = DAG.getNode(
11393 RISCVISD::SETCC_VL, DL, ContainerVT,
11394 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
11395 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11396 }
11397
11398 if (!VT.isFixedLengthVector())
11399 return Result;
11400 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11401}
11402
11403SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
11404 SelectionDAG &DAG) const {
11405 MVT VT = Op.getSimpleValueType();
11406 if (VT.getVectorElementType() != MVT::i1)
11407 return lowerVPOp(Op, DAG);
11408
11409 // It is safe to drop mask parameter as masked-off elements are undef.
11410 SDValue Op1 = Op->getOperand(0);
11411 SDValue Op2 = Op->getOperand(1);
11412 SDValue VL = Op->getOperand(3);
11413
11414 MVT ContainerVT = VT;
11415 const bool IsFixed = VT.isFixedLengthVector();
11416 if (IsFixed) {
11417 ContainerVT = getContainerForFixedLengthVector(VT);
11418 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11419 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11420 }
11421
11422 SDLoc DL(Op);
11423 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
11424 if (!IsFixed)
11425 return Val;
11426 return convertFromScalableVector(VT, Val, DAG, Subtarget);
11427}
11428
11429SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
11430 SelectionDAG &DAG) const {
11431 SDLoc DL(Op);
11432 MVT XLenVT = Subtarget.getXLenVT();
11433 MVT VT = Op.getSimpleValueType();
11434 MVT ContainerVT = VT;
11435 if (VT.isFixedLengthVector())
11436 ContainerVT = getContainerForFixedLengthVector(VT);
11437
11438 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11439
11440 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
11441 // Check if the mask is known to be all ones
11442 SDValue Mask = VPNode->getMask();
11443 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11444
11445 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
11446 : Intrinsic::riscv_vlse_mask,
11447 DL, XLenVT);
11448 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
11449 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
11450 VPNode->getStride()};
11451 if (!IsUnmasked) {
11452 if (VT.isFixedLengthVector()) {
11453 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11454 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11455 }
11456 Ops.push_back(Mask);
11457 }
11458 Ops.push_back(VPNode->getVectorLength());
11459 if (!IsUnmasked) {
11460 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
11461 Ops.push_back(Policy);
11462 }
11463
11464 SDValue Result =
11466 VPNode->getMemoryVT(), VPNode->getMemOperand());
11467 SDValue Chain = Result.getValue(1);
11468
11469 if (VT.isFixedLengthVector())
11470 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11471
11472 return DAG.getMergeValues({Result, Chain}, DL);
11473}
11474
11475SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
11476 SelectionDAG &DAG) const {
11477 SDLoc DL(Op);
11478 MVT XLenVT = Subtarget.getXLenVT();
11479
11480 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
11481 SDValue StoreVal = VPNode->getValue();
11482 MVT VT = StoreVal.getSimpleValueType();
11483 MVT ContainerVT = VT;
11484 if (VT.isFixedLengthVector()) {
11485 ContainerVT = getContainerForFixedLengthVector(VT);
11486 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11487 }
11488
11489 // Check if the mask is known to be all ones
11490 SDValue Mask = VPNode->getMask();
11491 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11492
11493 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
11494 : Intrinsic::riscv_vsse_mask,
11495 DL, XLenVT);
11496 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
11497 VPNode->getBasePtr(), VPNode->getStride()};
11498 if (!IsUnmasked) {
11499 if (VT.isFixedLengthVector()) {
11500 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11501 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11502 }
11503 Ops.push_back(Mask);
11504 }
11505 Ops.push_back(VPNode->getVectorLength());
11506
11507 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
11508 Ops, VPNode->getMemoryVT(),
11509 VPNode->getMemOperand());
11510}
11511
11512// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
11513// matched to a RVV indexed load. The RVV indexed load instructions only
11514// support the "unsigned unscaled" addressing mode; indices are implicitly
11515// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11516// signed or scaled indexing is extended to the XLEN value type and scaled
11517// accordingly.
11518SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
11519 SelectionDAG &DAG) const {
11520 SDLoc DL(Op);
11521 MVT VT = Op.getSimpleValueType();
11522
11523 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11524 EVT MemVT = MemSD->getMemoryVT();
11525 MachineMemOperand *MMO = MemSD->getMemOperand();
11526 SDValue Chain = MemSD->getChain();
11527 SDValue BasePtr = MemSD->getBasePtr();
11528
11529 [[maybe_unused]] ISD::LoadExtType LoadExtType;
11530 SDValue Index, Mask, PassThru, VL;
11531
11532 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
11533 Index = VPGN->getIndex();
11534 Mask = VPGN->getMask();
11535 PassThru = DAG.getUNDEF(VT);
11536 VL = VPGN->getVectorLength();
11537 // VP doesn't support extending loads.
11539 } else {
11540 // Else it must be a MGATHER.
11541 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
11542 Index = MGN->getIndex();
11543 Mask = MGN->getMask();
11544 PassThru = MGN->getPassThru();
11545 LoadExtType = MGN->getExtensionType();
11546 }
11547
11548 MVT IndexVT = Index.getSimpleValueType();
11549 MVT XLenVT = Subtarget.getXLenVT();
11550
11552 "Unexpected VTs!");
11553 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11554 // Targets have to explicitly opt-in for extending vector loads.
11555 assert(LoadExtType == ISD::NON_EXTLOAD &&
11556 "Unexpected extending MGATHER/VP_GATHER");
11557
11558 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11559 // the selection of the masked intrinsics doesn't do this for us.
11560 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11561
11562 MVT ContainerVT = VT;
11563 if (VT.isFixedLengthVector()) {
11564 ContainerVT = getContainerForFixedLengthVector(VT);
11565 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11566 ContainerVT.getVectorElementCount());
11567
11568 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11569
11570 if (!IsUnmasked) {
11571 MVT MaskVT = getMaskTypeFor(ContainerVT);
11572 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11573 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11574 }
11575 }
11576
11577 if (!VL)
11578 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11579
11580 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11581 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11582 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11583 }
11584
11585 unsigned IntID =
11586 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
11587 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11588 if (IsUnmasked)
11589 Ops.push_back(DAG.getUNDEF(ContainerVT));
11590 else
11591 Ops.push_back(PassThru);
11592 Ops.push_back(BasePtr);
11593 Ops.push_back(Index);
11594 if (!IsUnmasked)
11595 Ops.push_back(Mask);
11596 Ops.push_back(VL);
11597 if (!IsUnmasked)
11599
11600 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11601 SDValue Result =
11602 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11603 Chain = Result.getValue(1);
11604
11605 if (VT.isFixedLengthVector())
11606 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11607
11608 return DAG.getMergeValues({Result, Chain}, DL);
11609}
11610
11611// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
11612// matched to a RVV indexed store. The RVV indexed store instructions only
11613// support the "unsigned unscaled" addressing mode; indices are implicitly
11614// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11615// signed or scaled indexing is extended to the XLEN value type and scaled
11616// accordingly.
11617SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
11618 SelectionDAG &DAG) const {
11619 SDLoc DL(Op);
11620 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11621 EVT MemVT = MemSD->getMemoryVT();
11622 MachineMemOperand *MMO = MemSD->getMemOperand();
11623 SDValue Chain = MemSD->getChain();
11624 SDValue BasePtr = MemSD->getBasePtr();
11625
11626 [[maybe_unused]] bool IsTruncatingStore = false;
11627 SDValue Index, Mask, Val, VL;
11628
11629 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
11630 Index = VPSN->getIndex();
11631 Mask = VPSN->getMask();
11632 Val = VPSN->getValue();
11633 VL = VPSN->getVectorLength();
11634 // VP doesn't support truncating stores.
11635 IsTruncatingStore = false;
11636 } else {
11637 // Else it must be a MSCATTER.
11638 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
11639 Index = MSN->getIndex();
11640 Mask = MSN->getMask();
11641 Val = MSN->getValue();
11642 IsTruncatingStore = MSN->isTruncatingStore();
11643 }
11644
11645 MVT VT = Val.getSimpleValueType();
11646 MVT IndexVT = Index.getSimpleValueType();
11647 MVT XLenVT = Subtarget.getXLenVT();
11648
11650 "Unexpected VTs!");
11651 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11652 // Targets have to explicitly opt-in for extending vector loads and
11653 // truncating vector stores.
11654 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
11655
11656 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11657 // the selection of the masked intrinsics doesn't do this for us.
11658 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11659
11660 MVT ContainerVT = VT;
11661 if (VT.isFixedLengthVector()) {
11662 ContainerVT = getContainerForFixedLengthVector(VT);
11663 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11664 ContainerVT.getVectorElementCount());
11665
11666 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11667 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11668
11669 if (!IsUnmasked) {
11670 MVT MaskVT = getMaskTypeFor(ContainerVT);
11671 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11672 }
11673 }
11674
11675 if (!VL)
11676 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11677
11678 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11679 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11680 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11681 }
11682
11683 unsigned IntID =
11684 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
11685 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11686 Ops.push_back(Val);
11687 Ops.push_back(BasePtr);
11688 Ops.push_back(Index);
11689 if (!IsUnmasked)
11690 Ops.push_back(Mask);
11691 Ops.push_back(VL);
11692
11694 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11695}
11696
11697SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
11698 SelectionDAG &DAG) const {
11699 const MVT XLenVT = Subtarget.getXLenVT();
11700 SDLoc DL(Op);
11701 SDValue Chain = Op->getOperand(0);
11702 SDValue SysRegNo = DAG.getTargetConstant(
11703 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11704 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
11705 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
11706
11707 // Encoding used for rounding mode in RISC-V differs from that used in
11708 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
11709 // table, which consists of a sequence of 4-bit fields, each representing
11710 // corresponding FLT_ROUNDS mode.
11711 static const int Table =
11717
11718 SDValue Shift =
11719 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
11720 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11721 DAG.getConstant(Table, DL, XLenVT), Shift);
11722 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11723 DAG.getConstant(7, DL, XLenVT));
11724
11725 return DAG.getMergeValues({Masked, Chain}, DL);
11726}
11727
11728SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
11729 SelectionDAG &DAG) const {
11730 const MVT XLenVT = Subtarget.getXLenVT();
11731 SDLoc DL(Op);
11732 SDValue Chain = Op->getOperand(0);
11733 SDValue RMValue = Op->getOperand(1);
11734 SDValue SysRegNo = DAG.getTargetConstant(
11735 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11736
11737 // Encoding used for rounding mode in RISC-V differs from that used in
11738 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
11739 // a table, which consists of a sequence of 4-bit fields, each representing
11740 // corresponding RISC-V mode.
11741 static const unsigned Table =
11747
11748 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
11749
11750 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
11751 DAG.getConstant(2, DL, XLenVT));
11752 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11753 DAG.getConstant(Table, DL, XLenVT), Shift);
11754 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11755 DAG.getConstant(0x7, DL, XLenVT));
11756 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
11757 RMValue);
11758}
11759
11760SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
11761 SelectionDAG &DAG) const {
11763
11764 bool isRISCV64 = Subtarget.is64Bit();
11765 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11766
11767 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
11768 return DAG.getFrameIndex(FI, PtrVT);
11769}
11770
11771// Returns the opcode of the target-specific SDNode that implements the 32-bit
11772// form of the given Opcode.
11773static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
11774 switch (Opcode) {
11775 default:
11776 llvm_unreachable("Unexpected opcode");
11777 case ISD::SHL:
11778 return RISCVISD::SLLW;
11779 case ISD::SRA:
11780 return RISCVISD::SRAW;
11781 case ISD::SRL:
11782 return RISCVISD::SRLW;
11783 case ISD::SDIV:
11784 return RISCVISD::DIVW;
11785 case ISD::UDIV:
11786 return RISCVISD::DIVUW;
11787 case ISD::UREM:
11788 return RISCVISD::REMUW;
11789 case ISD::ROTL:
11790 return RISCVISD::ROLW;
11791 case ISD::ROTR:
11792 return RISCVISD::RORW;
11793 }
11794}
11795
11796// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
11797// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
11798// otherwise be promoted to i64, making it difficult to select the
11799// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
11800// type i8/i16/i32 is lost.
11802 unsigned ExtOpc = ISD::ANY_EXTEND) {
11803 SDLoc DL(N);
11804 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
11805 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
11806 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
11807 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
11808 // ReplaceNodeResults requires we maintain the same type for the return value.
11809 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
11810}
11811
11812// Converts the given 32-bit operation to a i64 operation with signed extension
11813// semantic to reduce the signed extension instructions.
11815 SDLoc DL(N);
11816 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11817 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11818 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
11819 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
11820 DAG.getValueType(MVT::i32));
11821 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
11822}
11823
11826 SelectionDAG &DAG) const {
11827 SDLoc DL(N);
11828 switch (N->getOpcode()) {
11829 default:
11830 llvm_unreachable("Don't know how to custom type legalize this operation!");
11833 case ISD::FP_TO_SINT:
11834 case ISD::FP_TO_UINT: {
11835 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11836 "Unexpected custom legalisation");
11837 bool IsStrict = N->isStrictFPOpcode();
11838 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
11839 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
11840 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
11841 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
11843 if (!isTypeLegal(Op0.getValueType()))
11844 return;
11845 if (IsStrict) {
11846 SDValue Chain = N->getOperand(0);
11847 // In absense of Zfh, promote f16 to f32, then convert.
11848 if (Op0.getValueType() == MVT::f16 &&
11849 !Subtarget.hasStdExtZfhOrZhinx()) {
11850 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
11851 {Chain, Op0});
11852 Chain = Op0.getValue(1);
11853 }
11854 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
11856 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
11857 SDValue Res = DAG.getNode(
11858 Opc, DL, VTs, Chain, Op0,
11859 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11860 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11861 Results.push_back(Res.getValue(1));
11862 return;
11863 }
11864 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
11865 // convert.
11866 if ((Op0.getValueType() == MVT::f16 &&
11867 !Subtarget.hasStdExtZfhOrZhinx()) ||
11868 Op0.getValueType() == MVT::bf16)
11869 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11870
11871 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
11872 SDValue Res =
11873 DAG.getNode(Opc, DL, MVT::i64, Op0,
11874 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11875 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11876 return;
11877 }
11878 // If the FP type needs to be softened, emit a library call using the 'si'
11879 // version. If we left it to default legalization we'd end up with 'di'. If
11880 // the FP type doesn't need to be softened just let generic type
11881 // legalization promote the result type.
11882 RTLIB::Libcall LC;
11883 if (IsSigned)
11884 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
11885 else
11886 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
11887 MakeLibCallOptions CallOptions;
11888 EVT OpVT = Op0.getValueType();
11889 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
11890 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
11891 SDValue Result;
11892 std::tie(Result, Chain) =
11893 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
11894 Results.push_back(Result);
11895 if (IsStrict)
11896 Results.push_back(Chain);
11897 break;
11898 }
11899 case ISD::LROUND: {
11900 SDValue Op0 = N->getOperand(0);
11901 EVT Op0VT = Op0.getValueType();
11902 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
11904 if (!isTypeLegal(Op0VT))
11905 return;
11906
11907 // In absense of Zfh, promote f16 to f32, then convert.
11908 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
11909 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11910
11911 SDValue Res =
11912 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
11913 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
11914 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11915 return;
11916 }
11917 // If the FP type needs to be softened, emit a library call to lround. We'll
11918 // need to truncate the result. We assume any value that doesn't fit in i32
11919 // is allowed to return an unspecified value.
11920 RTLIB::Libcall LC =
11921 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
11922 MakeLibCallOptions CallOptions;
11923 EVT OpVT = Op0.getValueType();
11924 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
11925 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
11926 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
11927 Results.push_back(Result);
11928 break;
11929 }
11932 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
11933 "has custom type legalization on riscv32");
11934
11935 SDValue LoCounter, HiCounter;
11936 MVT XLenVT = Subtarget.getXLenVT();
11937 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
11938 LoCounter = DAG.getTargetConstant(
11939 RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT);
11940 HiCounter = DAG.getTargetConstant(
11941 RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT);
11942 } else {
11943 LoCounter = DAG.getTargetConstant(
11944 RISCVSysReg::lookupSysRegByName("TIME")->Encoding, DL, XLenVT);
11945 HiCounter = DAG.getTargetConstant(
11946 RISCVSysReg::lookupSysRegByName("TIMEH")->Encoding, DL, XLenVT);
11947 }
11948 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11950 N->getOperand(0), LoCounter, HiCounter);
11951
11952 Results.push_back(
11953 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
11954 Results.push_back(RCW.getValue(2));
11955 break;
11956 }
11957 case ISD::LOAD: {
11958 if (!ISD::isNON_EXTLoad(N))
11959 return;
11960
11961 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
11962 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
11963 LoadSDNode *Ld = cast<LoadSDNode>(N);
11964
11965 SDLoc dl(N);
11966 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
11967 Ld->getBasePtr(), Ld->getMemoryVT(),
11968 Ld->getMemOperand());
11969 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
11970 Results.push_back(Res.getValue(1));
11971 return;
11972 }
11973 case ISD::MUL: {
11974 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
11975 unsigned XLen = Subtarget.getXLen();
11976 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
11977 if (Size > XLen) {
11978 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
11979 SDValue LHS = N->getOperand(0);
11980 SDValue RHS = N->getOperand(1);
11981 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
11982
11983 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
11984 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
11985 // We need exactly one side to be unsigned.
11986 if (LHSIsU == RHSIsU)
11987 return;
11988
11989 auto MakeMULPair = [&](SDValue S, SDValue U) {
11990 MVT XLenVT = Subtarget.getXLenVT();
11991 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
11992 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
11993 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
11994 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
11995 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
11996 };
11997
11998 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
11999 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
12000
12001 // The other operand should be signed, but still prefer MULH when
12002 // possible.
12003 if (RHSIsU && LHSIsS && !RHSIsS)
12004 Results.push_back(MakeMULPair(LHS, RHS));
12005 else if (LHSIsU && RHSIsS && !LHSIsS)
12006 Results.push_back(MakeMULPair(RHS, LHS));
12007
12008 return;
12009 }
12010 [[fallthrough]];
12011 }
12012 case ISD::ADD:
12013 case ISD::SUB:
12014 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12015 "Unexpected custom legalisation");
12016 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
12017 break;
12018 case ISD::SHL:
12019 case ISD::SRA:
12020 case ISD::SRL:
12021 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12022 "Unexpected custom legalisation");
12023 if (N->getOperand(1).getOpcode() != ISD::Constant) {
12024 // If we can use a BSET instruction, allow default promotion to apply.
12025 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
12026 isOneConstant(N->getOperand(0)))
12027 break;
12028 Results.push_back(customLegalizeToWOp(N, DAG));
12029 break;
12030 }
12031
12032 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
12033 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
12034 // shift amount.
12035 if (N->getOpcode() == ISD::SHL) {
12036 SDLoc DL(N);
12037 SDValue NewOp0 =
12038 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12039 SDValue NewOp1 =
12040 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
12041 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
12042 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12043 DAG.getValueType(MVT::i32));
12044 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12045 }
12046
12047 break;
12048 case ISD::ROTL:
12049 case ISD::ROTR:
12050 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12051 "Unexpected custom legalisation");
12052 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
12053 Subtarget.hasVendorXTHeadBb()) &&
12054 "Unexpected custom legalization");
12055 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
12056 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
12057 return;
12058 Results.push_back(customLegalizeToWOp(N, DAG));
12059 break;
12060 case ISD::CTTZ:
12062 case ISD::CTLZ:
12063 case ISD::CTLZ_ZERO_UNDEF: {
12064 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12065 "Unexpected custom legalisation");
12066
12067 SDValue NewOp0 =
12068 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12069 bool IsCTZ =
12070 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
12071 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
12072 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
12073 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12074 return;
12075 }
12076 case ISD::SDIV:
12077 case ISD::UDIV:
12078 case ISD::UREM: {
12079 MVT VT = N->getSimpleValueType(0);
12080 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
12081 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
12082 "Unexpected custom legalisation");
12083 // Don't promote division/remainder by constant since we should expand those
12084 // to multiply by magic constant.
12086 if (N->getOperand(1).getOpcode() == ISD::Constant &&
12087 !isIntDivCheap(N->getValueType(0), Attr))
12088 return;
12089
12090 // If the input is i32, use ANY_EXTEND since the W instructions don't read
12091 // the upper 32 bits. For other types we need to sign or zero extend
12092 // based on the opcode.
12093 unsigned ExtOpc = ISD::ANY_EXTEND;
12094 if (VT != MVT::i32)
12095 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
12097
12098 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
12099 break;
12100 }
12101 case ISD::SADDO: {
12102 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12103 "Unexpected custom legalisation");
12104
12105 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
12106 // use the default legalization.
12107 if (!isa<ConstantSDNode>(N->getOperand(1)))
12108 return;
12109
12110 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12111 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12112 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
12113 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12114 DAG.getValueType(MVT::i32));
12115
12116 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
12117
12118 // For an addition, the result should be less than one of the operands (LHS)
12119 // if and only if the other operand (RHS) is negative, otherwise there will
12120 // be overflow.
12121 // For a subtraction, the result should be less than one of the operands
12122 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
12123 // otherwise there will be overflow.
12124 EVT OType = N->getValueType(1);
12125 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
12126 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
12127
12128 SDValue Overflow =
12129 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
12130 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12131 Results.push_back(Overflow);
12132 return;
12133 }
12134 case ISD::UADDO:
12135 case ISD::USUBO: {
12136 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12137 "Unexpected custom legalisation");
12138 bool IsAdd = N->getOpcode() == ISD::UADDO;
12139 // Create an ADDW or SUBW.
12140 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12141 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12142 SDValue Res =
12143 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
12144 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12145 DAG.getValueType(MVT::i32));
12146
12147 SDValue Overflow;
12148 if (IsAdd && isOneConstant(RHS)) {
12149 // Special case uaddo X, 1 overflowed if the addition result is 0.
12150 // The general case (X + C) < C is not necessarily beneficial. Although we
12151 // reduce the live range of X, we may introduce the materialization of
12152 // constant C, especially when the setcc result is used by branch. We have
12153 // no compare with constant and branch instructions.
12154 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
12155 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
12156 } else if (IsAdd && isAllOnesConstant(RHS)) {
12157 // Special case uaddo X, -1 overflowed if X != 0.
12158 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
12159 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
12160 } else {
12161 // Sign extend the LHS and perform an unsigned compare with the ADDW
12162 // result. Since the inputs are sign extended from i32, this is equivalent
12163 // to comparing the lower 32 bits.
12164 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12165 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
12166 IsAdd ? ISD::SETULT : ISD::SETUGT);
12167 }
12168
12169 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12170 Results.push_back(Overflow);
12171 return;
12172 }
12173 case ISD::UADDSAT:
12174 case ISD::USUBSAT: {
12175 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12176 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
12177 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
12178 // promotion for UADDO/USUBO.
12179 Results.push_back(expandAddSubSat(N, DAG));
12180 return;
12181 }
12182 case ISD::SADDSAT:
12183 case ISD::SSUBSAT: {
12184 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12185 "Unexpected custom legalisation");
12186 Results.push_back(expandAddSubSat(N, DAG));
12187 return;
12188 }
12189 case ISD::ABS: {
12190 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12191 "Unexpected custom legalisation");
12192
12193 if (Subtarget.hasStdExtZbb()) {
12194 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
12195 // This allows us to remember that the result is sign extended. Expanding
12196 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
12197 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
12198 N->getOperand(0));
12199 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
12200 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
12201 return;
12202 }
12203
12204 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
12205 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12206
12207 // Freeze the source so we can increase it's use count.
12208 Src = DAG.getFreeze(Src);
12209
12210 // Copy sign bit to all bits using the sraiw pattern.
12211 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
12212 DAG.getValueType(MVT::i32));
12213 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
12214 DAG.getConstant(31, DL, MVT::i64));
12215
12216 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
12217 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
12218
12219 // NOTE: The result is only required to be anyextended, but sext is
12220 // consistent with type legalization of sub.
12221 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
12222 DAG.getValueType(MVT::i32));
12223 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12224 return;
12225 }
12226 case ISD::BITCAST: {
12227 EVT VT = N->getValueType(0);
12228 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
12229 SDValue Op0 = N->getOperand(0);
12230 EVT Op0VT = Op0.getValueType();
12231 MVT XLenVT = Subtarget.getXLenVT();
12232 if (VT == MVT::i16 && Op0VT == MVT::f16 &&
12233 Subtarget.hasStdExtZfhminOrZhinxmin()) {
12234 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12235 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12236 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
12237 Subtarget.hasStdExtZfbfmin()) {
12238 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12239 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12240 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
12241 Subtarget.hasStdExtFOrZfinx()) {
12242 SDValue FPConv =
12243 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
12244 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
12245 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32) {
12246 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
12247 DAG.getVTList(MVT::i32, MVT::i32), Op0);
12248 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
12249 NewReg.getValue(0), NewReg.getValue(1));
12250 Results.push_back(RetReg);
12251 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
12252 isTypeLegal(Op0VT)) {
12253 // Custom-legalize bitcasts from fixed-length vector types to illegal
12254 // scalar types in order to improve codegen. Bitcast the vector to a
12255 // one-element vector type whose element type is the same as the result
12256 // type, and extract the first element.
12257 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
12258 if (isTypeLegal(BVT)) {
12259 SDValue BVec = DAG.getBitcast(BVT, Op0);
12260 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
12261 DAG.getVectorIdxConstant(0, DL)));
12262 }
12263 }
12264 break;
12265 }
12266 case RISCVISD::BREV8:
12267 case RISCVISD::ORC_B: {
12268 MVT VT = N->getSimpleValueType(0);
12269 MVT XLenVT = Subtarget.getXLenVT();
12270 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
12271 "Unexpected custom legalisation");
12272 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
12273 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
12274 "Unexpected extension");
12275 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
12276 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
12277 // ReplaceNodeResults requires we maintain the same type for the return
12278 // value.
12279 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
12280 break;
12281 }
12283 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
12284 // type is illegal (currently only vXi64 RV32).
12285 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
12286 // transferred to the destination register. We issue two of these from the
12287 // upper- and lower- halves of the SEW-bit vector element, slid down to the
12288 // first element.
12289 SDValue Vec = N->getOperand(0);
12290 SDValue Idx = N->getOperand(1);
12291
12292 // The vector type hasn't been legalized yet so we can't issue target
12293 // specific nodes if it needs legalization.
12294 // FIXME: We would manually legalize if it's important.
12295 if (!isTypeLegal(Vec.getValueType()))
12296 return;
12297
12298 MVT VecVT = Vec.getSimpleValueType();
12299
12300 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
12301 VecVT.getVectorElementType() == MVT::i64 &&
12302 "Unexpected EXTRACT_VECTOR_ELT legalization");
12303
12304 // If this is a fixed vector, we need to convert it to a scalable vector.
12305 MVT ContainerVT = VecVT;
12306 if (VecVT.isFixedLengthVector()) {
12307 ContainerVT = getContainerForFixedLengthVector(VecVT);
12308 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12309 }
12310
12311 MVT XLenVT = Subtarget.getXLenVT();
12312
12313 // Use a VL of 1 to avoid processing more elements than we need.
12314 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
12315
12316 // Unless the index is known to be 0, we must slide the vector down to get
12317 // the desired element into index 0.
12318 if (!isNullConstant(Idx)) {
12319 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12320 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
12321 }
12322
12323 // Extract the lower XLEN bits of the correct vector element.
12324 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12325
12326 // To extract the upper XLEN bits of the vector element, shift the first
12327 // element right by 32 bits and re-extract the lower XLEN bits.
12328 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12329 DAG.getUNDEF(ContainerVT),
12330 DAG.getConstant(32, DL, XLenVT), VL);
12331 SDValue LShr32 =
12332 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
12333 DAG.getUNDEF(ContainerVT), Mask, VL);
12334
12335 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12336
12337 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12338 break;
12339 }
12341 unsigned IntNo = N->getConstantOperandVal(0);
12342 switch (IntNo) {
12343 default:
12345 "Don't know how to custom type legalize this intrinsic!");
12346 case Intrinsic::experimental_get_vector_length: {
12347 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
12348 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12349 return;
12350 }
12351 case Intrinsic::experimental_cttz_elts: {
12352 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
12353 Results.push_back(
12354 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
12355 return;
12356 }
12357 case Intrinsic::riscv_orc_b:
12358 case Intrinsic::riscv_brev8:
12359 case Intrinsic::riscv_sha256sig0:
12360 case Intrinsic::riscv_sha256sig1:
12361 case Intrinsic::riscv_sha256sum0:
12362 case Intrinsic::riscv_sha256sum1:
12363 case Intrinsic::riscv_sm3p0:
12364 case Intrinsic::riscv_sm3p1: {
12365 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12366 return;
12367 unsigned Opc;
12368 switch (IntNo) {
12369 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
12370 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
12371 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
12372 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
12373 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
12374 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
12375 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
12376 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
12377 }
12378
12379 SDValue NewOp =
12380 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12381 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
12382 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12383 return;
12384 }
12385 case Intrinsic::riscv_sm4ks:
12386 case Intrinsic::riscv_sm4ed: {
12387 unsigned Opc =
12388 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
12389 SDValue NewOp0 =
12390 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12391 SDValue NewOp1 =
12392 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12393 SDValue Res =
12394 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
12395 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12396 return;
12397 }
12398 case Intrinsic::riscv_mopr: {
12399 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12400 return;
12401 SDValue NewOp =
12402 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12403 SDValue Res = DAG.getNode(
12404 RISCVISD::MOPR, DL, MVT::i64, NewOp,
12405 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
12406 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12407 return;
12408 }
12409 case Intrinsic::riscv_moprr: {
12410 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12411 return;
12412 SDValue NewOp0 =
12413 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12414 SDValue NewOp1 =
12415 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12416 SDValue Res = DAG.getNode(
12417 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
12418 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
12419 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12420 return;
12421 }
12422 case Intrinsic::riscv_clmul: {
12423 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12424 return;
12425
12426 SDValue NewOp0 =
12427 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12428 SDValue NewOp1 =
12429 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12430 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
12431 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12432 return;
12433 }
12434 case Intrinsic::riscv_clmulh:
12435 case Intrinsic::riscv_clmulr: {
12436 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12437 return;
12438
12439 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
12440 // to the full 128-bit clmul result of multiplying two xlen values.
12441 // Perform clmulr or clmulh on the shifted values. Finally, extract the
12442 // upper 32 bits.
12443 //
12444 // The alternative is to mask the inputs to 32 bits and use clmul, but
12445 // that requires two shifts to mask each input without zext.w.
12446 // FIXME: If the inputs are known zero extended or could be freely
12447 // zero extended, the mask form would be better.
12448 SDValue NewOp0 =
12449 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12450 SDValue NewOp1 =
12451 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12452 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
12453 DAG.getConstant(32, DL, MVT::i64));
12454 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
12455 DAG.getConstant(32, DL, MVT::i64));
12456 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
12458 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
12459 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
12460 DAG.getConstant(32, DL, MVT::i64));
12461 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12462 return;
12463 }
12464 case Intrinsic::riscv_vmv_x_s: {
12465 EVT VT = N->getValueType(0);
12466 MVT XLenVT = Subtarget.getXLenVT();
12467 if (VT.bitsLT(XLenVT)) {
12468 // Simple case just extract using vmv.x.s and truncate.
12469 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
12470 Subtarget.getXLenVT(), N->getOperand(1));
12471 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
12472 return;
12473 }
12474
12475 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
12476 "Unexpected custom legalization");
12477
12478 // We need to do the move in two steps.
12479 SDValue Vec = N->getOperand(1);
12480 MVT VecVT = Vec.getSimpleValueType();
12481
12482 // First extract the lower XLEN bits of the element.
12483 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12484
12485 // To extract the upper XLEN bits of the vector element, shift the first
12486 // element right by 32 bits and re-extract the lower XLEN bits.
12487 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
12488
12489 SDValue ThirtyTwoV =
12490 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
12491 DAG.getConstant(32, DL, XLenVT), VL);
12492 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
12493 DAG.getUNDEF(VecVT), Mask, VL);
12494 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12495
12496 Results.push_back(
12497 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12498 break;
12499 }
12500 }
12501 break;
12502 }
12503 case ISD::VECREDUCE_ADD:
12504 case ISD::VECREDUCE_AND:
12505 case ISD::VECREDUCE_OR:
12506 case ISD::VECREDUCE_XOR:
12511 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
12512 Results.push_back(V);
12513 break;
12514 case ISD::VP_REDUCE_ADD:
12515 case ISD::VP_REDUCE_AND:
12516 case ISD::VP_REDUCE_OR:
12517 case ISD::VP_REDUCE_XOR:
12518 case ISD::VP_REDUCE_SMAX:
12519 case ISD::VP_REDUCE_UMAX:
12520 case ISD::VP_REDUCE_SMIN:
12521 case ISD::VP_REDUCE_UMIN:
12522 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
12523 Results.push_back(V);
12524 break;
12525 case ISD::GET_ROUNDING: {
12526 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
12527 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
12528 Results.push_back(Res.getValue(0));
12529 Results.push_back(Res.getValue(1));
12530 break;
12531 }
12532 }
12533}
12534
12535/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
12536/// which corresponds to it.
12537static unsigned getVecReduceOpcode(unsigned Opc) {
12538 switch (Opc) {
12539 default:
12540 llvm_unreachable("Unhandled binary to transfrom reduction");
12541 case ISD::ADD:
12542 return ISD::VECREDUCE_ADD;
12543 case ISD::UMAX:
12544 return ISD::VECREDUCE_UMAX;
12545 case ISD::SMAX:
12546 return ISD::VECREDUCE_SMAX;
12547 case ISD::UMIN:
12548 return ISD::VECREDUCE_UMIN;
12549 case ISD::SMIN:
12550 return ISD::VECREDUCE_SMIN;
12551 case ISD::AND:
12552 return ISD::VECREDUCE_AND;
12553 case ISD::OR:
12554 return ISD::VECREDUCE_OR;
12555 case ISD::XOR:
12556 return ISD::VECREDUCE_XOR;
12557 case ISD::FADD:
12558 // Note: This is the associative form of the generic reduction opcode.
12559 return ISD::VECREDUCE_FADD;
12560 }
12561}
12562
12563/// Perform two related transforms whose purpose is to incrementally recognize
12564/// an explode_vector followed by scalar reduction as a vector reduction node.
12565/// This exists to recover from a deficiency in SLP which can't handle
12566/// forests with multiple roots sharing common nodes. In some cases, one
12567/// of the trees will be vectorized, and the other will remain (unprofitably)
12568/// scalarized.
12569static SDValue
12571 const RISCVSubtarget &Subtarget) {
12572
12573 // This transforms need to run before all integer types have been legalized
12574 // to i64 (so that the vector element type matches the add type), and while
12575 // it's safe to introduce odd sized vector types.
12577 return SDValue();
12578
12579 // Without V, this transform isn't useful. We could form the (illegal)
12580 // operations and let them be scalarized again, but there's really no point.
12581 if (!Subtarget.hasVInstructions())
12582 return SDValue();
12583
12584 const SDLoc DL(N);
12585 const EVT VT = N->getValueType(0);
12586 const unsigned Opc = N->getOpcode();
12587
12588 // For FADD, we only handle the case with reassociation allowed. We
12589 // could handle strict reduction order, but at the moment, there's no
12590 // known reason to, and the complexity isn't worth it.
12591 // TODO: Handle fminnum and fmaxnum here
12592 if (!VT.isInteger() &&
12593 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
12594 return SDValue();
12595
12596 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
12597 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
12598 "Inconsistent mappings");
12599 SDValue LHS = N->getOperand(0);
12600 SDValue RHS = N->getOperand(1);
12601
12602 if (!LHS.hasOneUse() || !RHS.hasOneUse())
12603 return SDValue();
12604
12605 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12606 std::swap(LHS, RHS);
12607
12608 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12609 !isa<ConstantSDNode>(RHS.getOperand(1)))
12610 return SDValue();
12611
12612 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
12613 SDValue SrcVec = RHS.getOperand(0);
12614 EVT SrcVecVT = SrcVec.getValueType();
12615 assert(SrcVecVT.getVectorElementType() == VT);
12616 if (SrcVecVT.isScalableVector())
12617 return SDValue();
12618
12619 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
12620 return SDValue();
12621
12622 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
12623 // reduce_op (extract_subvector [2 x VT] from V). This will form the
12624 // root of our reduction tree. TODO: We could extend this to any two
12625 // adjacent aligned constant indices if desired.
12626 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12627 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
12628 uint64_t LHSIdx =
12629 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
12630 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
12631 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
12632 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12633 DAG.getVectorIdxConstant(0, DL));
12634 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
12635 }
12636 }
12637
12638 // Match (binop (reduce (extract_subvector V, 0),
12639 // (extract_vector_elt V, sizeof(SubVec))))
12640 // into a reduction of one more element from the original vector V.
12641 if (LHS.getOpcode() != ReduceOpc)
12642 return SDValue();
12643
12644 SDValue ReduceVec = LHS.getOperand(0);
12645 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
12646 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
12647 isNullConstant(ReduceVec.getOperand(1)) &&
12648 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
12649 // For illegal types (e.g. 3xi32), most will be combined again into a
12650 // wider (hopefully legal) type. If this is a terminal state, we are
12651 // relying on type legalization here to produce something reasonable
12652 // and this lowering quality could probably be improved. (TODO)
12653 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
12654 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12655 DAG.getVectorIdxConstant(0, DL));
12656 auto Flags = ReduceVec->getFlags();
12657 Flags.intersectWith(N->getFlags());
12658 return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags);
12659 }
12660
12661 return SDValue();
12662}
12663
12664
12665// Try to fold (<bop> x, (reduction.<bop> vec, start))
12667 const RISCVSubtarget &Subtarget) {
12668 auto BinOpToRVVReduce = [](unsigned Opc) {
12669 switch (Opc) {
12670 default:
12671 llvm_unreachable("Unhandled binary to transfrom reduction");
12672 case ISD::ADD:
12674 case ISD::UMAX:
12676 case ISD::SMAX:
12678 case ISD::UMIN:
12680 case ISD::SMIN:
12682 case ISD::AND:
12684 case ISD::OR:
12686 case ISD::XOR:
12688 case ISD::FADD:
12690 case ISD::FMAXNUM:
12692 case ISD::FMINNUM:
12694 }
12695 };
12696
12697 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
12698 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12699 isNullConstant(V.getOperand(1)) &&
12700 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
12701 };
12702
12703 unsigned Opc = N->getOpcode();
12704 unsigned ReduceIdx;
12705 if (IsReduction(N->getOperand(0), Opc))
12706 ReduceIdx = 0;
12707 else if (IsReduction(N->getOperand(1), Opc))
12708 ReduceIdx = 1;
12709 else
12710 return SDValue();
12711
12712 // Skip if FADD disallows reassociation but the combiner needs.
12713 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
12714 return SDValue();
12715
12716 SDValue Extract = N->getOperand(ReduceIdx);
12717 SDValue Reduce = Extract.getOperand(0);
12718 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
12719 return SDValue();
12720
12721 SDValue ScalarV = Reduce.getOperand(2);
12722 EVT ScalarVT = ScalarV.getValueType();
12723 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
12724 ScalarV.getOperand(0)->isUndef() &&
12725 isNullConstant(ScalarV.getOperand(2)))
12726 ScalarV = ScalarV.getOperand(1);
12727
12728 // Make sure that ScalarV is a splat with VL=1.
12729 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
12730 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
12731 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
12732 return SDValue();
12733
12734 if (!isNonZeroAVL(ScalarV.getOperand(2)))
12735 return SDValue();
12736
12737 // Check the scalar of ScalarV is neutral element
12738 // TODO: Deal with value other than neutral element.
12739 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
12740 0))
12741 return SDValue();
12742
12743 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
12744 // FIXME: We might be able to improve this if operand 0 is undef.
12745 if (!isNonZeroAVL(Reduce.getOperand(5)))
12746 return SDValue();
12747
12748 SDValue NewStart = N->getOperand(1 - ReduceIdx);
12749
12750 SDLoc DL(N);
12751 SDValue NewScalarV =
12752 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
12753 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
12754
12755 // If we looked through an INSERT_SUBVECTOR we need to restore it.
12756 if (ScalarVT != ScalarV.getValueType())
12757 NewScalarV =
12758 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
12759 NewScalarV, DAG.getVectorIdxConstant(0, DL));
12760
12761 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
12762 NewScalarV, Reduce.getOperand(3),
12763 Reduce.getOperand(4), Reduce.getOperand(5)};
12764 SDValue NewReduce =
12765 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
12766 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
12767 Extract.getOperand(1));
12768}
12769
12770// Optimize (add (shl x, c0), (shl y, c1)) ->
12771// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
12773 const RISCVSubtarget &Subtarget) {
12774 // Perform this optimization only in the zba extension.
12775 if (!Subtarget.hasStdExtZba())
12776 return SDValue();
12777
12778 // Skip for vector types and larger types.
12779 EVT VT = N->getValueType(0);
12780 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12781 return SDValue();
12782
12783 // The two operand nodes must be SHL and have no other use.
12784 SDValue N0 = N->getOperand(0);
12785 SDValue N1 = N->getOperand(1);
12786 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
12787 !N0->hasOneUse() || !N1->hasOneUse())
12788 return SDValue();
12789
12790 // Check c0 and c1.
12791 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12792 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
12793 if (!N0C || !N1C)
12794 return SDValue();
12795 int64_t C0 = N0C->getSExtValue();
12796 int64_t C1 = N1C->getSExtValue();
12797 if (C0 <= 0 || C1 <= 0)
12798 return SDValue();
12799
12800 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
12801 int64_t Bits = std::min(C0, C1);
12802 int64_t Diff = std::abs(C0 - C1);
12803 if (Diff != 1 && Diff != 2 && Diff != 3)
12804 return SDValue();
12805
12806 // Build nodes.
12807 SDLoc DL(N);
12808 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
12809 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
12810 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
12811 DAG.getConstant(Diff, DL, VT), NS);
12812 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
12813}
12814
12815// Combine a constant select operand into its use:
12816//
12817// (and (select cond, -1, c), x)
12818// -> (select cond, x, (and x, c)) [AllOnes=1]
12819// (or (select cond, 0, c), x)
12820// -> (select cond, x, (or x, c)) [AllOnes=0]
12821// (xor (select cond, 0, c), x)
12822// -> (select cond, x, (xor x, c)) [AllOnes=0]
12823// (add (select cond, 0, c), x)
12824// -> (select cond, x, (add x, c)) [AllOnes=0]
12825// (sub x, (select cond, 0, c))
12826// -> (select cond, x, (sub x, c)) [AllOnes=0]
12828 SelectionDAG &DAG, bool AllOnes,
12829 const RISCVSubtarget &Subtarget) {
12830 EVT VT = N->getValueType(0);
12831
12832 // Skip vectors.
12833 if (VT.isVector())
12834 return SDValue();
12835
12836 if (!Subtarget.hasConditionalMoveFusion()) {
12837 // (select cond, x, (and x, c)) has custom lowering with Zicond.
12838 if ((!Subtarget.hasStdExtZicond() &&
12839 !Subtarget.hasVendorXVentanaCondOps()) ||
12840 N->getOpcode() != ISD::AND)
12841 return SDValue();
12842
12843 // Maybe harmful when condition code has multiple use.
12844 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
12845 return SDValue();
12846
12847 // Maybe harmful when VT is wider than XLen.
12848 if (VT.getSizeInBits() > Subtarget.getXLen())
12849 return SDValue();
12850 }
12851
12852 if ((Slct.getOpcode() != ISD::SELECT &&
12853 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
12854 !Slct.hasOneUse())
12855 return SDValue();
12856
12857 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
12859 };
12860
12861 bool SwapSelectOps;
12862 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
12863 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
12864 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
12865 SDValue NonConstantVal;
12866 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
12867 SwapSelectOps = false;
12868 NonConstantVal = FalseVal;
12869 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
12870 SwapSelectOps = true;
12871 NonConstantVal = TrueVal;
12872 } else
12873 return SDValue();
12874
12875 // Slct is now know to be the desired identity constant when CC is true.
12876 TrueVal = OtherOp;
12877 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
12878 // Unless SwapSelectOps says the condition should be false.
12879 if (SwapSelectOps)
12880 std::swap(TrueVal, FalseVal);
12881
12882 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
12883 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
12884 {Slct.getOperand(0), Slct.getOperand(1),
12885 Slct.getOperand(2), TrueVal, FalseVal});
12886
12887 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12888 {Slct.getOperand(0), TrueVal, FalseVal});
12889}
12890
12891// Attempt combineSelectAndUse on each operand of a commutative operator N.
12893 bool AllOnes,
12894 const RISCVSubtarget &Subtarget) {
12895 SDValue N0 = N->getOperand(0);
12896 SDValue N1 = N->getOperand(1);
12897 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
12898 return Result;
12899 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
12900 return Result;
12901 return SDValue();
12902}
12903
12904// Transform (add (mul x, c0), c1) ->
12905// (add (mul (add x, c1/c0), c0), c1%c0).
12906// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
12907// that should be excluded is when c0*(c1/c0) is simm12, which will lead
12908// to an infinite loop in DAGCombine if transformed.
12909// Or transform (add (mul x, c0), c1) ->
12910// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
12911// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
12912// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
12913// lead to an infinite loop in DAGCombine if transformed.
12914// Or transform (add (mul x, c0), c1) ->
12915// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
12916// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
12917// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
12918// lead to an infinite loop in DAGCombine if transformed.
12919// Or transform (add (mul x, c0), c1) ->
12920// (mul (add x, c1/c0), c0).
12921// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
12923 const RISCVSubtarget &Subtarget) {
12924 // Skip for vector types and larger types.
12925 EVT VT = N->getValueType(0);
12926 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12927 return SDValue();
12928 // The first operand node must be a MUL and has no other use.
12929 SDValue N0 = N->getOperand(0);
12930 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
12931 return SDValue();
12932 // Check if c0 and c1 match above conditions.
12933 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12934 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
12935 if (!N0C || !N1C)
12936 return SDValue();
12937 // If N0C has multiple uses it's possible one of the cases in
12938 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
12939 // in an infinite loop.
12940 if (!N0C->hasOneUse())
12941 return SDValue();
12942 int64_t C0 = N0C->getSExtValue();
12943 int64_t C1 = N1C->getSExtValue();
12944 int64_t CA, CB;
12945 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
12946 return SDValue();
12947 // Search for proper CA (non-zero) and CB that both are simm12.
12948 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
12949 !isInt<12>(C0 * (C1 / C0))) {
12950 CA = C1 / C0;
12951 CB = C1 % C0;
12952 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
12953 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
12954 CA = C1 / C0 + 1;
12955 CB = C1 % C0 - C0;
12956 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
12957 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
12958 CA = C1 / C0 - 1;
12959 CB = C1 % C0 + C0;
12960 } else
12961 return SDValue();
12962 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
12963 SDLoc DL(N);
12964 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
12965 DAG.getConstant(CA, DL, VT));
12966 SDValue New1 =
12967 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
12968 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
12969}
12970
12971// add (zext, zext) -> zext (add (zext, zext))
12972// sub (zext, zext) -> sext (sub (zext, zext))
12973// mul (zext, zext) -> zext (mul (zext, zext))
12974// sdiv (zext, zext) -> zext (sdiv (zext, zext))
12975// udiv (zext, zext) -> zext (udiv (zext, zext))
12976// srem (zext, zext) -> zext (srem (zext, zext))
12977// urem (zext, zext) -> zext (urem (zext, zext))
12978//
12979// where the sum of the extend widths match, and the the range of the bin op
12980// fits inside the width of the narrower bin op. (For profitability on rvv, we
12981// use a power of two for both inner and outer extend.)
12983
12984 EVT VT = N->getValueType(0);
12985 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
12986 return SDValue();
12987
12988 SDValue N0 = N->getOperand(0);
12989 SDValue N1 = N->getOperand(1);
12991 return SDValue();
12992 if (!N0.hasOneUse() || !N1.hasOneUse())
12993 return SDValue();
12994
12995 SDValue Src0 = N0.getOperand(0);
12996 SDValue Src1 = N1.getOperand(0);
12997 EVT SrcVT = Src0.getValueType();
12998 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
12999 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
13000 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
13001 return SDValue();
13002
13003 LLVMContext &C = *DAG.getContext();
13005 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
13006
13007 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
13008 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
13009
13010 // Src0 and Src1 are zero extended, so they're always positive if signed.
13011 //
13012 // sub can produce a negative from two positive operands, so it needs sign
13013 // extended. Other nodes produce a positive from two positive operands, so
13014 // zero extend instead.
13015 unsigned OuterExtend =
13016 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13017
13018 return DAG.getNode(
13019 OuterExtend, SDLoc(N), VT,
13020 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
13021}
13022
13023// Try to turn (add (xor bool, 1) -1) into (neg bool).
13025 SDValue N0 = N->getOperand(0);
13026 SDValue N1 = N->getOperand(1);
13027 EVT VT = N->getValueType(0);
13028 SDLoc DL(N);
13029
13030 // RHS should be -1.
13031 if (!isAllOnesConstant(N1))
13032 return SDValue();
13033
13034 // Look for (xor X, 1).
13035 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
13036 return SDValue();
13037
13038 // First xor input should be 0 or 1.
13040 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
13041 return SDValue();
13042
13043 // Emit a negate of the setcc.
13044 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
13045 N0.getOperand(0));
13046}
13047
13050 const RISCVSubtarget &Subtarget) {
13051 SelectionDAG &DAG = DCI.DAG;
13052 if (SDValue V = combineAddOfBooleanXor(N, DAG))
13053 return V;
13054 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
13055 return V;
13056 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
13057 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
13058 return V;
13059 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13060 return V;
13061 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13062 return V;
13063 if (SDValue V = combineBinOpOfZExt(N, DAG))
13064 return V;
13065
13066 // fold (add (select lhs, rhs, cc, 0, y), x) ->
13067 // (select lhs, rhs, cc, x, (add x, y))
13068 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13069}
13070
13071// Try to turn a sub boolean RHS and constant LHS into an addi.
13073 SDValue N0 = N->getOperand(0);
13074 SDValue N1 = N->getOperand(1);
13075 EVT VT = N->getValueType(0);
13076 SDLoc DL(N);
13077
13078 // Require a constant LHS.
13079 auto *N0C = dyn_cast<ConstantSDNode>(N0);
13080 if (!N0C)
13081 return SDValue();
13082
13083 // All our optimizations involve subtracting 1 from the immediate and forming
13084 // an ADDI. Make sure the new immediate is valid for an ADDI.
13085 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
13086 if (!ImmValMinus1.isSignedIntN(12))
13087 return SDValue();
13088
13089 SDValue NewLHS;
13090 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
13091 // (sub constant, (setcc x, y, eq/neq)) ->
13092 // (add (setcc x, y, neq/eq), constant - 1)
13093 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13094 EVT SetCCOpVT = N1.getOperand(0).getValueType();
13095 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
13096 return SDValue();
13097 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
13098 NewLHS =
13099 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
13100 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
13101 N1.getOperand(0).getOpcode() == ISD::SETCC) {
13102 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
13103 // Since setcc returns a bool the xor is equivalent to 1-setcc.
13104 NewLHS = N1.getOperand(0);
13105 } else
13106 return SDValue();
13107
13108 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
13109 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
13110}
13111
13112// Looks for (sub (shl X, 8), X) where only bits 8, 16, 24, 32, etc. of X are
13113// non-zero. Replace with orc.b.
13115 const RISCVSubtarget &Subtarget) {
13116 if (!Subtarget.hasStdExtZbb())
13117 return SDValue();
13118
13119 EVT VT = N->getValueType(0);
13120
13121 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
13122 return SDValue();
13123
13124 SDValue N0 = N->getOperand(0);
13125 SDValue N1 = N->getOperand(1);
13126
13127 if (N0.getOpcode() != ISD::SHL || N0.getOperand(0) != N1 || !N0.hasOneUse())
13128 return SDValue();
13129
13130 auto *ShAmtC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
13131 if (!ShAmtC || ShAmtC->getZExtValue() != 8)
13132 return SDValue();
13133
13134 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0xfe));
13135 if (!DAG.MaskedValueIsZero(N1, Mask))
13136 return SDValue();
13137
13138 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, N1);
13139}
13140
13142 const RISCVSubtarget &Subtarget) {
13143 if (SDValue V = combineSubOfBoolean(N, DAG))
13144 return V;
13145
13146 EVT VT = N->getValueType(0);
13147 SDValue N0 = N->getOperand(0);
13148 SDValue N1 = N->getOperand(1);
13149 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
13150 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
13151 isNullConstant(N1.getOperand(1))) {
13152 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13153 if (CCVal == ISD::SETLT) {
13154 SDLoc DL(N);
13155 unsigned ShAmt = N0.getValueSizeInBits() - 1;
13156 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
13157 DAG.getConstant(ShAmt, DL, VT));
13158 }
13159 }
13160
13161 if (SDValue V = combineBinOpOfZExt(N, DAG))
13162 return V;
13163 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
13164 return V;
13165
13166 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
13167 // (select lhs, rhs, cc, x, (sub x, y))
13168 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
13169}
13170
13171// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
13172// Legalizing setcc can introduce xors like this. Doing this transform reduces
13173// the number of xors and may allow the xor to fold into a branch condition.
13175 SDValue N0 = N->getOperand(0);
13176 SDValue N1 = N->getOperand(1);
13177 bool IsAnd = N->getOpcode() == ISD::AND;
13178
13179 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
13180 return SDValue();
13181
13182 if (!N0.hasOneUse() || !N1.hasOneUse())
13183 return SDValue();
13184
13185 SDValue N01 = N0.getOperand(1);
13186 SDValue N11 = N1.getOperand(1);
13187
13188 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
13189 // (xor X, -1) based on the upper bits of the other operand being 0. If the
13190 // operation is And, allow one of the Xors to use -1.
13191 if (isOneConstant(N01)) {
13192 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
13193 return SDValue();
13194 } else if (isOneConstant(N11)) {
13195 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
13196 if (!(IsAnd && isAllOnesConstant(N01)))
13197 return SDValue();
13198 } else
13199 return SDValue();
13200
13201 EVT VT = N->getValueType(0);
13202
13203 SDValue N00 = N0.getOperand(0);
13204 SDValue N10 = N1.getOperand(0);
13205
13206 // The LHS of the xors needs to be 0/1.
13208 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
13209 return SDValue();
13210
13211 // Invert the opcode and insert a new xor.
13212 SDLoc DL(N);
13213 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
13214 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
13215 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
13216}
13217
13218// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
13219// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
13220// value to an unsigned value. This will be lowered to vmax and series of
13221// vnclipu instructions later. This can be extended to other truncated types
13222// other than i8 by replacing 256 and 255 with the equivalent constants for the
13223// type.
13225 EVT VT = N->getValueType(0);
13226 SDValue N0 = N->getOperand(0);
13227 EVT SrcVT = N0.getValueType();
13228
13229 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13230 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
13231 return SDValue();
13232
13233 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
13234 return SDValue();
13235
13236 SDValue Cond = N0.getOperand(0);
13237 SDValue True = N0.getOperand(1);
13238 SDValue False = N0.getOperand(2);
13239
13240 if (Cond.getOpcode() != ISD::SETCC)
13241 return SDValue();
13242
13243 // FIXME: Support the version of this pattern with the select operands
13244 // swapped.
13245 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
13246 if (CCVal != ISD::SETULT)
13247 return SDValue();
13248
13249 SDValue CondLHS = Cond.getOperand(0);
13250 SDValue CondRHS = Cond.getOperand(1);
13251
13252 if (CondLHS != True)
13253 return SDValue();
13254
13255 unsigned ScalarBits = VT.getScalarSizeInBits();
13256
13257 // FIXME: Support other constants.
13258 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
13259 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
13260 return SDValue();
13261
13262 if (False.getOpcode() != ISD::SIGN_EXTEND)
13263 return SDValue();
13264
13265 False = False.getOperand(0);
13266
13267 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
13268 return SDValue();
13269
13270 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
13271 if (!FalseRHSC || !FalseRHSC->isZero())
13272 return SDValue();
13273
13274 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
13275 if (CCVal2 != ISD::SETGT)
13276 return SDValue();
13277
13278 // Emit the signed to unsigned saturation pattern.
13279 SDLoc DL(N);
13280 SDValue Max =
13281 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
13282 SDValue Min =
13283 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
13284 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
13285 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
13286}
13287
13289 const RISCVSubtarget &Subtarget) {
13290 SDValue N0 = N->getOperand(0);
13291 EVT VT = N->getValueType(0);
13292
13293 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
13294 // extending X. This is safe since we only need the LSB after the shift and
13295 // shift amounts larger than 31 would produce poison. If we wait until
13296 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13297 // to use a BEXT instruction.
13298 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
13299 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
13300 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13301 SDLoc DL(N0);
13302 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13303 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13304 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13305 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
13306 }
13307
13308 return combineTruncSelectToSMaxUSat(N, DAG);
13309}
13310
13311// Combines two comparison operation and logic operation to one selection
13312// operation(min, max) and logic operation. Returns new constructed Node if
13313// conditions for optimization are satisfied.
13316 const RISCVSubtarget &Subtarget) {
13317 SelectionDAG &DAG = DCI.DAG;
13318
13319 SDValue N0 = N->getOperand(0);
13320 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
13321 // extending X. This is safe since we only need the LSB after the shift and
13322 // shift amounts larger than 31 would produce poison. If we wait until
13323 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13324 // to use a BEXT instruction.
13325 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13326 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
13327 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
13328 N0.hasOneUse()) {
13329 SDLoc DL(N);
13330 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13331 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13332 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13333 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
13334 DAG.getConstant(1, DL, MVT::i64));
13335 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13336 }
13337
13338 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13339 return V;
13340 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13341 return V;
13342
13343 if (DCI.isAfterLegalizeDAG())
13344 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13345 return V;
13346
13347 // fold (and (select lhs, rhs, cc, -1, y), x) ->
13348 // (select lhs, rhs, cc, x, (and x, y))
13349 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
13350}
13351
13352// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
13353// FIXME: Generalize to other binary operators with same operand.
13355 SelectionDAG &DAG) {
13356 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
13357
13358 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
13360 !N0.hasOneUse() || !N1.hasOneUse())
13361 return SDValue();
13362
13363 // Should have the same condition.
13364 SDValue Cond = N0.getOperand(1);
13365 if (Cond != N1.getOperand(1))
13366 return SDValue();
13367
13368 SDValue TrueV = N0.getOperand(0);
13369 SDValue FalseV = N1.getOperand(0);
13370
13371 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
13372 TrueV.getOperand(1) != FalseV.getOperand(1) ||
13373 !isOneConstant(TrueV.getOperand(1)) ||
13374 !TrueV.hasOneUse() || !FalseV.hasOneUse())
13375 return SDValue();
13376
13377 EVT VT = N->getValueType(0);
13378 SDLoc DL(N);
13379
13380 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
13381 Cond);
13382 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
13383 Cond);
13384 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
13385 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
13386}
13387
13389 const RISCVSubtarget &Subtarget) {
13390 SelectionDAG &DAG = DCI.DAG;
13391
13392 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13393 return V;
13394 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13395 return V;
13396
13397 if (DCI.isAfterLegalizeDAG())
13398 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13399 return V;
13400
13401 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
13402 // We may be able to pull a common operation out of the true and false value.
13403 SDValue N0 = N->getOperand(0);
13404 SDValue N1 = N->getOperand(1);
13405 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
13406 return V;
13407 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
13408 return V;
13409
13410 // fold (or (select cond, 0, y), x) ->
13411 // (select cond, x, (or x, y))
13412 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13413}
13414
13416 const RISCVSubtarget &Subtarget) {
13417 SDValue N0 = N->getOperand(0);
13418 SDValue N1 = N->getOperand(1);
13419
13420 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
13421 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
13422 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
13423 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13424 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
13425 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
13426 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13427 SDLoc DL(N);
13428 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13429 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13430 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
13431 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
13432 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13433 }
13434
13435 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
13436 // NOTE: Assumes ROL being legal means ROLW is legal.
13437 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13438 if (N0.getOpcode() == RISCVISD::SLLW &&
13440 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
13441 SDLoc DL(N);
13442 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
13443 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
13444 }
13445
13446 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
13447 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
13448 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
13449 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13450 if (ConstN00 && CC == ISD::SETLT) {
13451 EVT VT = N0.getValueType();
13452 SDLoc DL(N0);
13453 const APInt &Imm = ConstN00->getAPIntValue();
13454 if ((Imm + 1).isSignedIntN(12))
13455 return DAG.getSetCC(DL, VT, N0.getOperand(1),
13456 DAG.getConstant(Imm + 1, DL, VT), CC);
13457 }
13458 }
13459
13460 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13461 return V;
13462 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13463 return V;
13464
13465 // fold (xor (select cond, 0, y), x) ->
13466 // (select cond, x, (xor x, y))
13467 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13468}
13469
13470// Try to expand a scalar multiply to a faster sequence.
13473 const RISCVSubtarget &Subtarget) {
13474
13475 EVT VT = N->getValueType(0);
13476
13477 // LI + MUL is usually smaller than the alternative sequence.
13479 return SDValue();
13480
13481 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
13482 return SDValue();
13483
13484 if (VT != Subtarget.getXLenVT())
13485 return SDValue();
13486
13487 const bool HasShlAdd =
13488 Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
13489
13490 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
13491 if (!CNode)
13492 return SDValue();
13493 uint64_t MulAmt = CNode->getZExtValue();
13494
13495 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
13496 // We're adding additional uses of X here, and in principle, we should be freezing
13497 // X before doing so. However, adding freeze here causes real regressions, and no
13498 // other target properly freezes X in these cases either.
13499 SDValue X = N->getOperand(0);
13500
13501 if (HasShlAdd) {
13502 for (uint64_t Divisor : {3, 5, 9}) {
13503 if (MulAmt % Divisor != 0)
13504 continue;
13505 uint64_t MulAmt2 = MulAmt / Divisor;
13506 // 3/5/9 * 2^N -> shl (shXadd X, X), N
13507 if (isPowerOf2_64(MulAmt2)) {
13508 SDLoc DL(N);
13509 SDValue X = N->getOperand(0);
13510 // Put the shift first if we can fold a zext into the
13511 // shift forming a slli.uw.
13512 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
13513 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
13514 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
13515 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
13516 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
13517 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
13518 Shl);
13519 }
13520 // Otherwise, put rhe shl second so that it can fold with following
13521 // instructions (e.g. sext or add).
13522 SDValue Mul359 =
13523 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13524 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13525 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
13526 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
13527 }
13528
13529 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
13530 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
13531 SDLoc DL(N);
13532 SDValue Mul359 =
13533 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13534 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13535 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13536 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
13537 Mul359);
13538 }
13539 }
13540
13541 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
13542 // shXadd. First check if this a sum of two power of 2s because that's
13543 // easy. Then count how many zeros are up to the first bit.
13544 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
13545 unsigned ScaleShift = llvm::countr_zero(MulAmt);
13546 if (ScaleShift >= 1 && ScaleShift < 4) {
13547 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
13548 SDLoc DL(N);
13549 SDValue Shift1 =
13550 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13551 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13552 DAG.getConstant(ScaleShift, DL, VT), Shift1);
13553 }
13554 }
13555
13556 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
13557 // This is the two instruction form, there are also three instruction
13558 // variants we could implement. e.g.
13559 // (2^(1,2,3) * 3,5,9 + 1) << C2
13560 // 2^(C1>3) * 3,5,9 +/- 1
13561 for (uint64_t Divisor : {3, 5, 9}) {
13562 uint64_t C = MulAmt - 1;
13563 if (C <= Divisor)
13564 continue;
13565 unsigned TZ = llvm::countr_zero(C);
13566 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
13567 SDLoc DL(N);
13568 SDValue Mul359 =
13569 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13570 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13571 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13572 DAG.getConstant(TZ, DL, VT), X);
13573 }
13574 }
13575
13576 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
13577 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
13578 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
13579 if (ScaleShift >= 1 && ScaleShift < 4) {
13580 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
13581 SDLoc DL(N);
13582 SDValue Shift1 =
13583 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13584 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
13585 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13586 DAG.getConstant(ScaleShift, DL, VT), X));
13587 }
13588 }
13589
13590 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
13591 for (uint64_t Offset : {3, 5, 9}) {
13592 if (isPowerOf2_64(MulAmt + Offset)) {
13593 SDLoc DL(N);
13594 SDValue Shift1 =
13595 DAG.getNode(ISD::SHL, DL, VT, X,
13596 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
13597 SDValue Mul359 =
13598 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13599 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
13600 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
13601 }
13602 }
13603 }
13604
13605 // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
13606 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
13607 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
13608 uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
13609 SDLoc DL(N);
13610 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
13611 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
13612 SDValue Shift2 =
13613 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
13614 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
13615 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
13616 }
13617
13618 return SDValue();
13619}
13620
13621// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
13622// (bitcast (sra (v2Xi16 (bitcast X)), 15))
13623// Same for other equivalent types with other equivalent constants.
13625 EVT VT = N->getValueType(0);
13626 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13627
13628 // Do this for legal vectors unless they are i1 or i8 vectors.
13629 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
13630 return SDValue();
13631
13632 if (N->getOperand(0).getOpcode() != ISD::AND ||
13633 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
13634 return SDValue();
13635
13636 SDValue And = N->getOperand(0);
13637 SDValue Srl = And.getOperand(0);
13638
13639 APInt V1, V2, V3;
13640 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
13641 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
13643 return SDValue();
13644
13645 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
13646 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
13647 V3 != (HalfSize - 1))
13648 return SDValue();
13649
13650 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
13651 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
13652 VT.getVectorElementCount() * 2);
13653 SDLoc DL(N);
13654 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
13655 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
13656 DAG.getConstant(HalfSize - 1, DL, HalfVT));
13657 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
13658}
13659
13662 const RISCVSubtarget &Subtarget) {
13663 EVT VT = N->getValueType(0);
13664 if (!VT.isVector())
13665 return expandMul(N, DAG, DCI, Subtarget);
13666
13667 SDLoc DL(N);
13668 SDValue N0 = N->getOperand(0);
13669 SDValue N1 = N->getOperand(1);
13670 SDValue MulOper;
13671 unsigned AddSubOpc;
13672
13673 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
13674 // (mul x, add (y, 1)) -> (add x, (mul x, y))
13675 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
13676 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
13677 auto IsAddSubWith1 = [&](SDValue V) -> bool {
13678 AddSubOpc = V->getOpcode();
13679 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
13680 SDValue Opnd = V->getOperand(1);
13681 MulOper = V->getOperand(0);
13682 if (AddSubOpc == ISD::SUB)
13683 std::swap(Opnd, MulOper);
13684 if (isOneOrOneSplat(Opnd))
13685 return true;
13686 }
13687 return false;
13688 };
13689
13690 if (IsAddSubWith1(N0)) {
13691 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
13692 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
13693 }
13694
13695 if (IsAddSubWith1(N1)) {
13696 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
13697 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
13698 }
13699
13700 if (SDValue V = combineBinOpOfZExt(N, DAG))
13701 return V;
13702
13704 return V;
13705
13706 return SDValue();
13707}
13708
13709/// According to the property that indexed load/store instructions zero-extend
13710/// their indices, try to narrow the type of index operand.
13711static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
13712 if (isIndexTypeSigned(IndexType))
13713 return false;
13714
13715 if (!N->hasOneUse())
13716 return false;
13717
13718 EVT VT = N.getValueType();
13719 SDLoc DL(N);
13720
13721 // In general, what we're doing here is seeing if we can sink a truncate to
13722 // a smaller element type into the expression tree building our index.
13723 // TODO: We can generalize this and handle a bunch more cases if useful.
13724
13725 // Narrow a buildvector to the narrowest element type. This requires less
13726 // work and less register pressure at high LMUL, and creates smaller constants
13727 // which may be cheaper to materialize.
13728 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
13729 KnownBits Known = DAG.computeKnownBits(N);
13730 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
13731 LLVMContext &C = *DAG.getContext();
13732 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
13733 if (ResultVT.bitsLT(VT.getVectorElementType())) {
13734 N = DAG.getNode(ISD::TRUNCATE, DL,
13735 VT.changeVectorElementType(ResultVT), N);
13736 return true;
13737 }
13738 }
13739
13740 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
13741 if (N.getOpcode() != ISD::SHL)
13742 return false;
13743
13744 SDValue N0 = N.getOperand(0);
13745 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
13747 return false;
13748 if (!N0->hasOneUse())
13749 return false;
13750
13751 APInt ShAmt;
13752 SDValue N1 = N.getOperand(1);
13753 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
13754 return false;
13755
13756 SDValue Src = N0.getOperand(0);
13757 EVT SrcVT = Src.getValueType();
13758 unsigned SrcElen = SrcVT.getScalarSizeInBits();
13759 unsigned ShAmtV = ShAmt.getZExtValue();
13760 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
13761 NewElen = std::max(NewElen, 8U);
13762
13763 // Skip if NewElen is not narrower than the original extended type.
13764 if (NewElen >= N0.getValueType().getScalarSizeInBits())
13765 return false;
13766
13767 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
13768 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
13769
13770 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
13771 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
13772 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
13773 return true;
13774}
13775
13776// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
13777// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
13778// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
13779// can become a sext.w instead of a shift pair.
13781 const RISCVSubtarget &Subtarget) {
13782 SDValue N0 = N->getOperand(0);
13783 SDValue N1 = N->getOperand(1);
13784 EVT VT = N->getValueType(0);
13785 EVT OpVT = N0.getValueType();
13786
13787 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
13788 return SDValue();
13789
13790 // RHS needs to be a constant.
13791 auto *N1C = dyn_cast<ConstantSDNode>(N1);
13792 if (!N1C)
13793 return SDValue();
13794
13795 // LHS needs to be (and X, 0xffffffff).
13796 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
13797 !isa<ConstantSDNode>(N0.getOperand(1)) ||
13798 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
13799 return SDValue();
13800
13801 // Looking for an equality compare.
13802 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
13803 if (!isIntEqualitySetCC(Cond))
13804 return SDValue();
13805
13806 // Don't do this if the sign bit is provably zero, it will be turned back into
13807 // an AND.
13808 APInt SignMask = APInt::getOneBitSet(64, 31);
13809 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
13810 return SDValue();
13811
13812 const APInt &C1 = N1C->getAPIntValue();
13813
13814 SDLoc dl(N);
13815 // If the constant is larger than 2^32 - 1 it is impossible for both sides
13816 // to be equal.
13817 if (C1.getActiveBits() > 32)
13818 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
13819
13820 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
13821 N0.getOperand(0), DAG.getValueType(MVT::i32));
13822 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
13823 dl, OpVT), Cond);
13824}
13825
13826static SDValue
13828 const RISCVSubtarget &Subtarget) {
13829 SDValue Src = N->getOperand(0);
13830 EVT VT = N->getValueType(0);
13831
13832 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
13833 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
13834 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
13835 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
13836 Src.getOperand(0));
13837
13838 return SDValue();
13839}
13840
13841namespace {
13842// Forward declaration of the structure holding the necessary information to
13843// apply a combine.
13844struct CombineResult;
13845
13846enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
13847/// Helper class for folding sign/zero extensions.
13848/// In particular, this class is used for the following combines:
13849/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
13850/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
13851/// mul | mul_vl -> vwmul(u) | vwmul_su
13852/// shl | shl_vl -> vwsll
13853/// fadd -> vfwadd | vfwadd_w
13854/// fsub -> vfwsub | vfwsub_w
13855/// fmul -> vfwmul
13856/// An object of this class represents an operand of the operation we want to
13857/// combine.
13858/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
13859/// NodeExtensionHelper for `a` and one for `b`.
13860///
13861/// This class abstracts away how the extension is materialized and
13862/// how its number of users affect the combines.
13863///
13864/// In particular:
13865/// - VWADD_W is conceptually == add(op0, sext(op1))
13866/// - VWADDU_W == add(op0, zext(op1))
13867/// - VWSUB_W == sub(op0, sext(op1))
13868/// - VWSUBU_W == sub(op0, zext(op1))
13869/// - VFWADD_W == fadd(op0, fpext(op1))
13870/// - VFWSUB_W == fsub(op0, fpext(op1))
13871/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
13872/// zext|sext(smaller_value).
13873struct NodeExtensionHelper {
13874 /// Records if this operand is like being zero extended.
13875 bool SupportsZExt;
13876 /// Records if this operand is like being sign extended.
13877 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
13878 /// instance, a splat constant (e.g., 3), would support being both sign and
13879 /// zero extended.
13880 bool SupportsSExt;
13881 /// Records if this operand is like being floating-Point extended.
13882 bool SupportsFPExt;
13883 /// This boolean captures whether we care if this operand would still be
13884 /// around after the folding happens.
13885 bool EnforceOneUse;
13886 /// Original value that this NodeExtensionHelper represents.
13887 SDValue OrigOperand;
13888
13889 /// Get the value feeding the extension or the value itself.
13890 /// E.g., for zext(a), this would return a.
13891 SDValue getSource() const {
13892 switch (OrigOperand.getOpcode()) {
13893 case ISD::ZERO_EXTEND:
13894 case ISD::SIGN_EXTEND:
13895 case RISCVISD::VSEXT_VL:
13896 case RISCVISD::VZEXT_VL:
13898 return OrigOperand.getOperand(0);
13899 default:
13900 return OrigOperand;
13901 }
13902 }
13903
13904 /// Check if this instance represents a splat.
13905 bool isSplat() const {
13906 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
13907 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
13908 }
13909
13910 /// Get the extended opcode.
13911 unsigned getExtOpc(ExtKind SupportsExt) const {
13912 switch (SupportsExt) {
13913 case ExtKind::SExt:
13914 return RISCVISD::VSEXT_VL;
13915 case ExtKind::ZExt:
13916 return RISCVISD::VZEXT_VL;
13917 case ExtKind::FPExt:
13919 }
13920 llvm_unreachable("Unknown ExtKind enum");
13921 }
13922
13923 /// Get or create a value that can feed \p Root with the given extension \p
13924 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
13925 /// operand. \see ::getSource().
13926 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
13927 const RISCVSubtarget &Subtarget,
13928 std::optional<ExtKind> SupportsExt) const {
13929 if (!SupportsExt.has_value())
13930 return OrigOperand;
13931
13932 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
13933
13934 SDValue Source = getSource();
13935 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
13936 if (Source.getValueType() == NarrowVT)
13937 return Source;
13938
13939 unsigned ExtOpc = getExtOpc(*SupportsExt);
13940
13941 // If we need an extension, we should be changing the type.
13942 SDLoc DL(OrigOperand);
13943 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
13944 switch (OrigOperand.getOpcode()) {
13945 case ISD::ZERO_EXTEND:
13946 case ISD::SIGN_EXTEND:
13947 case RISCVISD::VSEXT_VL:
13948 case RISCVISD::VZEXT_VL:
13950 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
13951 case ISD::SPLAT_VECTOR:
13952 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
13954 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
13955 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
13957 Source = Source.getOperand(1);
13958 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
13959 Source = Source.getOperand(0);
13960 assert(Source.getValueType() == NarrowVT.getVectorElementType());
13961 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
13962 DAG.getUNDEF(NarrowVT), Source, VL);
13963 default:
13964 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
13965 // and that operand should already have the right NarrowVT so no
13966 // extension should be required at this point.
13967 llvm_unreachable("Unsupported opcode");
13968 }
13969 }
13970
13971 /// Helper function to get the narrow type for \p Root.
13972 /// The narrow type is the type of \p Root where we divided the size of each
13973 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
13974 /// \pre Both the narrow type and the original type should be legal.
13975 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
13976 MVT VT = Root->getSimpleValueType(0);
13977
13978 // Determine the narrow size.
13979 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
13980
13981 MVT EltVT = SupportsExt == ExtKind::FPExt
13982 ? MVT::getFloatingPointVT(NarrowSize)
13983 : MVT::getIntegerVT(NarrowSize);
13984
13985 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
13986 "Trying to extend something we can't represent");
13987 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
13988 return NarrowVT;
13989 }
13990
13991 /// Get the opcode to materialize:
13992 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
13993 static unsigned getSExtOpcode(unsigned Opcode) {
13994 switch (Opcode) {
13995 case ISD::ADD:
13996 case RISCVISD::ADD_VL:
13999 case ISD::OR:
14000 return RISCVISD::VWADD_VL;
14001 case ISD::SUB:
14002 case RISCVISD::SUB_VL:
14005 return RISCVISD::VWSUB_VL;
14006 case ISD::MUL:
14007 case RISCVISD::MUL_VL:
14008 return RISCVISD::VWMUL_VL;
14009 default:
14010 llvm_unreachable("Unexpected opcode");
14011 }
14012 }
14013
14014 /// Get the opcode to materialize:
14015 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
14016 static unsigned getZExtOpcode(unsigned Opcode) {
14017 switch (Opcode) {
14018 case ISD::ADD:
14019 case RISCVISD::ADD_VL:
14022 case ISD::OR:
14023 return RISCVISD::VWADDU_VL;
14024 case ISD::SUB:
14025 case RISCVISD::SUB_VL:
14028 return RISCVISD::VWSUBU_VL;
14029 case ISD::MUL:
14030 case RISCVISD::MUL_VL:
14031 return RISCVISD::VWMULU_VL;
14032 case ISD::SHL:
14033 case RISCVISD::SHL_VL:
14034 return RISCVISD::VWSLL_VL;
14035 default:
14036 llvm_unreachable("Unexpected opcode");
14037 }
14038 }
14039
14040 /// Get the opcode to materialize:
14041 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
14042 static unsigned getFPExtOpcode(unsigned Opcode) {
14043 switch (Opcode) {
14044 case RISCVISD::FADD_VL:
14046 return RISCVISD::VFWADD_VL;
14047 case RISCVISD::FSUB_VL:
14049 return RISCVISD::VFWSUB_VL;
14050 case RISCVISD::FMUL_VL:
14051 return RISCVISD::VFWMUL_VL;
14053 return RISCVISD::VFWMADD_VL;
14055 return RISCVISD::VFWMSUB_VL;
14057 return RISCVISD::VFWNMADD_VL;
14059 return RISCVISD::VFWNMSUB_VL;
14060 default:
14061 llvm_unreachable("Unexpected opcode");
14062 }
14063 }
14064
14065 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
14066 /// newOpcode(a, b).
14067 static unsigned getSUOpcode(unsigned Opcode) {
14068 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
14069 "SU is only supported for MUL");
14070 return RISCVISD::VWMULSU_VL;
14071 }
14072
14073 /// Get the opcode to materialize
14074 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
14075 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
14076 switch (Opcode) {
14077 case ISD::ADD:
14078 case RISCVISD::ADD_VL:
14079 case ISD::OR:
14080 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
14082 case ISD::SUB:
14083 case RISCVISD::SUB_VL:
14084 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
14086 case RISCVISD::FADD_VL:
14087 return RISCVISD::VFWADD_W_VL;
14088 case RISCVISD::FSUB_VL:
14089 return RISCVISD::VFWSUB_W_VL;
14090 default:
14091 llvm_unreachable("Unexpected opcode");
14092 }
14093 }
14094
14095 using CombineToTry = std::function<std::optional<CombineResult>(
14096 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
14097 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
14098 const RISCVSubtarget &)>;
14099
14100 /// Check if this node needs to be fully folded or extended for all users.
14101 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
14102
14103 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
14104 const RISCVSubtarget &Subtarget) {
14105 unsigned Opc = OrigOperand.getOpcode();
14106 MVT VT = OrigOperand.getSimpleValueType();
14107
14108 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
14109 "Unexpected Opcode");
14110
14111 // The pasthru must be undef for tail agnostic.
14112 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
14113 return;
14114
14115 // Get the scalar value.
14116 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
14117 : OrigOperand.getOperand(1);
14118
14119 // See if we have enough sign bits or zero bits in the scalar to use a
14120 // widening opcode by splatting to smaller element size.
14121 unsigned EltBits = VT.getScalarSizeInBits();
14122 unsigned ScalarBits = Op.getValueSizeInBits();
14123 // If we're not getting all bits from the element, we need special handling.
14124 if (ScalarBits < EltBits) {
14125 // This should only occur on RV32.
14126 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
14127 !Subtarget.is64Bit() && "Unexpected splat");
14128 // vmv.v.x sign extends narrow inputs.
14129 SupportsSExt = true;
14130
14131 // If the input is positive, then sign extend is also zero extend.
14132 if (DAG.SignBitIsZero(Op))
14133 SupportsZExt = true;
14134
14135 EnforceOneUse = false;
14136 return;
14137 }
14138
14139 unsigned NarrowSize = EltBits / 2;
14140 // If the narrow type cannot be expressed with a legal VMV,
14141 // this is not a valid candidate.
14142 if (NarrowSize < 8)
14143 return;
14144
14145 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
14146 SupportsSExt = true;
14147
14148 if (DAG.MaskedValueIsZero(Op,
14149 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
14150 SupportsZExt = true;
14151
14152 EnforceOneUse = false;
14153 }
14154
14155 /// Helper method to set the various fields of this struct based on the
14156 /// type of \p Root.
14157 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
14158 const RISCVSubtarget &Subtarget) {
14159 SupportsZExt = false;
14160 SupportsSExt = false;
14161 SupportsFPExt = false;
14162 EnforceOneUse = true;
14163 unsigned Opc = OrigOperand.getOpcode();
14164 // For the nodes we handle below, we end up using their inputs directly: see
14165 // getSource(). However since they either don't have a passthru or we check
14166 // that their passthru is undef, we can safely ignore their mask and VL.
14167 switch (Opc) {
14168 case ISD::ZERO_EXTEND:
14169 case ISD::SIGN_EXTEND: {
14170 MVT VT = OrigOperand.getSimpleValueType();
14171 if (!VT.isVector())
14172 break;
14173
14174 SDValue NarrowElt = OrigOperand.getOperand(0);
14175 MVT NarrowVT = NarrowElt.getSimpleValueType();
14176 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
14177 if (NarrowVT.getVectorElementType() == MVT::i1)
14178 break;
14179
14180 SupportsZExt = Opc == ISD::ZERO_EXTEND;
14181 SupportsSExt = Opc == ISD::SIGN_EXTEND;
14182 break;
14183 }
14184 case RISCVISD::VZEXT_VL:
14185 SupportsZExt = true;
14186 break;
14187 case RISCVISD::VSEXT_VL:
14188 SupportsSExt = true;
14189 break;
14191 SupportsFPExt = true;
14192 break;
14193 case ISD::SPLAT_VECTOR:
14195 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
14196 break;
14197 case RISCVISD::VFMV_V_F_VL: {
14198 MVT VT = OrigOperand.getSimpleValueType();
14199
14200 if (!OrigOperand.getOperand(0).isUndef())
14201 break;
14202
14203 SDValue Op = OrigOperand.getOperand(1);
14204 if (Op.getOpcode() != ISD::FP_EXTEND)
14205 break;
14206
14207 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
14208 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
14209 if (NarrowSize != ScalarBits)
14210 break;
14211
14212 SupportsFPExt = true;
14213 break;
14214 }
14215 default:
14216 break;
14217 }
14218 }
14219
14220 /// Check if \p Root supports any extension folding combines.
14221 static bool isSupportedRoot(const SDNode *Root,
14222 const RISCVSubtarget &Subtarget) {
14223 switch (Root->getOpcode()) {
14224 case ISD::ADD:
14225 case ISD::SUB:
14226 case ISD::MUL: {
14227 return Root->getValueType(0).isScalableVector();
14228 }
14229 case ISD::OR: {
14230 return Root->getValueType(0).isScalableVector() &&
14231 Root->getFlags().hasDisjoint();
14232 }
14233 // Vector Widening Integer Add/Sub/Mul Instructions
14234 case RISCVISD::ADD_VL:
14235 case RISCVISD::MUL_VL:
14238 case RISCVISD::SUB_VL:
14241 // Vector Widening Floating-Point Add/Sub/Mul Instructions
14242 case RISCVISD::FADD_VL:
14243 case RISCVISD::FSUB_VL:
14244 case RISCVISD::FMUL_VL:
14247 return true;
14248 case ISD::SHL:
14249 return Root->getValueType(0).isScalableVector() &&
14250 Subtarget.hasStdExtZvbb();
14251 case RISCVISD::SHL_VL:
14252 return Subtarget.hasStdExtZvbb();
14257 return true;
14258 default:
14259 return false;
14260 }
14261 }
14262
14263 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
14264 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
14265 const RISCVSubtarget &Subtarget) {
14266 assert(isSupportedRoot(Root, Subtarget) &&
14267 "Trying to build an helper with an "
14268 "unsupported root");
14269 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
14271 OrigOperand = Root->getOperand(OperandIdx);
14272
14273 unsigned Opc = Root->getOpcode();
14274 switch (Opc) {
14275 // We consider
14276 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
14277 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
14278 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
14285 if (OperandIdx == 1) {
14286 SupportsZExt =
14288 SupportsSExt =
14290 SupportsFPExt =
14292 // There's no existing extension here, so we don't have to worry about
14293 // making sure it gets removed.
14294 EnforceOneUse = false;
14295 break;
14296 }
14297 [[fallthrough]];
14298 default:
14299 fillUpExtensionSupport(Root, DAG, Subtarget);
14300 break;
14301 }
14302 }
14303
14304 /// Helper function to get the Mask and VL from \p Root.
14305 static std::pair<SDValue, SDValue>
14306 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
14307 const RISCVSubtarget &Subtarget) {
14308 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
14309 switch (Root->getOpcode()) {
14310 case ISD::ADD:
14311 case ISD::SUB:
14312 case ISD::MUL:
14313 case ISD::OR:
14314 case ISD::SHL: {
14315 SDLoc DL(Root);
14316 MVT VT = Root->getSimpleValueType(0);
14317 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
14318 }
14319 default:
14320 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
14321 }
14322 }
14323
14324 /// Helper function to check if \p N is commutative with respect to the
14325 /// foldings that are supported by this class.
14326 static bool isCommutative(const SDNode *N) {
14327 switch (N->getOpcode()) {
14328 case ISD::ADD:
14329 case ISD::MUL:
14330 case ISD::OR:
14331 case RISCVISD::ADD_VL:
14332 case RISCVISD::MUL_VL:
14335 case RISCVISD::FADD_VL:
14336 case RISCVISD::FMUL_VL:
14342 return true;
14343 case ISD::SUB:
14344 case RISCVISD::SUB_VL:
14347 case RISCVISD::FSUB_VL:
14349 case ISD::SHL:
14350 case RISCVISD::SHL_VL:
14351 return false;
14352 default:
14353 llvm_unreachable("Unexpected opcode");
14354 }
14355 }
14356
14357 /// Get a list of combine to try for folding extensions in \p Root.
14358 /// Note that each returned CombineToTry function doesn't actually modify
14359 /// anything. Instead they produce an optional CombineResult that if not None,
14360 /// need to be materialized for the combine to be applied.
14361 /// \see CombineResult::materialize.
14362 /// If the related CombineToTry function returns std::nullopt, that means the
14363 /// combine didn't match.
14364 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
14365};
14366
14367/// Helper structure that holds all the necessary information to materialize a
14368/// combine that does some extension folding.
14369struct CombineResult {
14370 /// Opcode to be generated when materializing the combine.
14371 unsigned TargetOpcode;
14372 // No value means no extension is needed.
14373 std::optional<ExtKind> LHSExt;
14374 std::optional<ExtKind> RHSExt;
14375 /// Root of the combine.
14376 SDNode *Root;
14377 /// LHS of the TargetOpcode.
14378 NodeExtensionHelper LHS;
14379 /// RHS of the TargetOpcode.
14380 NodeExtensionHelper RHS;
14381
14382 CombineResult(unsigned TargetOpcode, SDNode *Root,
14383 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
14384 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
14385 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
14386 LHS(LHS), RHS(RHS) {}
14387
14388 /// Return a value that uses TargetOpcode and that can be used to replace
14389 /// Root.
14390 /// The actual replacement is *not* done in that method.
14391 SDValue materialize(SelectionDAG &DAG,
14392 const RISCVSubtarget &Subtarget) const {
14393 SDValue Mask, VL, Passthru;
14394 std::tie(Mask, VL) =
14395 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
14396 switch (Root->getOpcode()) {
14397 default:
14398 Passthru = Root->getOperand(2);
14399 break;
14400 case ISD::ADD:
14401 case ISD::SUB:
14402 case ISD::MUL:
14403 case ISD::OR:
14404 case ISD::SHL:
14405 Passthru = DAG.getUNDEF(Root->getValueType(0));
14406 break;
14407 }
14408 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
14409 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
14410 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
14411 Passthru, Mask, VL);
14412 }
14413};
14414
14415/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14416/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14417/// are zext) and LHS and RHS can be folded into Root.
14418/// AllowExtMask define which form `ext` can take in this pattern.
14419///
14420/// \note If the pattern can match with both zext and sext, the returned
14421/// CombineResult will feature the zext result.
14422///
14423/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14424/// can be used to apply the pattern.
14425static std::optional<CombineResult>
14426canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
14427 const NodeExtensionHelper &RHS,
14428 uint8_t AllowExtMask, SelectionDAG &DAG,
14429 const RISCVSubtarget &Subtarget) {
14430 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
14431 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
14432 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
14433 /*RHSExt=*/{ExtKind::ZExt});
14434 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
14435 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
14436 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14437 /*RHSExt=*/{ExtKind::SExt});
14438 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
14439 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
14440 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
14441 /*RHSExt=*/{ExtKind::FPExt});
14442 return std::nullopt;
14443}
14444
14445/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14446/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14447/// are zext) and LHS and RHS can be folded into Root.
14448///
14449/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14450/// can be used to apply the pattern.
14451static std::optional<CombineResult>
14452canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
14453 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14454 const RISCVSubtarget &Subtarget) {
14455 return canFoldToVWWithSameExtensionImpl(
14456 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
14457 Subtarget);
14458}
14459
14460/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
14461///
14462/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14463/// can be used to apply the pattern.
14464static std::optional<CombineResult>
14465canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
14466 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14467 const RISCVSubtarget &Subtarget) {
14468 if (RHS.SupportsFPExt)
14469 return CombineResult(
14470 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
14471 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
14472
14473 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
14474 // sext/zext?
14475 // Control this behavior behind an option (AllowSplatInVW_W) for testing
14476 // purposes.
14477 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
14478 return CombineResult(
14479 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
14480 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
14481 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
14482 return CombineResult(
14483 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
14484 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
14485 return std::nullopt;
14486}
14487
14488/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
14489///
14490/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14491/// can be used to apply the pattern.
14492static std::optional<CombineResult>
14493canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14494 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14495 const RISCVSubtarget &Subtarget) {
14496 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
14497 Subtarget);
14498}
14499
14500/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
14501///
14502/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14503/// can be used to apply the pattern.
14504static std::optional<CombineResult>
14505canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14506 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14507 const RISCVSubtarget &Subtarget) {
14508 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
14509 Subtarget);
14510}
14511
14512/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
14513///
14514/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14515/// can be used to apply the pattern.
14516static std::optional<CombineResult>
14517canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14518 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14519 const RISCVSubtarget &Subtarget) {
14520 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
14521 Subtarget);
14522}
14523
14524/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
14525///
14526/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14527/// can be used to apply the pattern.
14528static std::optional<CombineResult>
14529canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
14530 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14531 const RISCVSubtarget &Subtarget) {
14532
14533 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
14534 return std::nullopt;
14535 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
14536 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14537 /*RHSExt=*/{ExtKind::ZExt});
14538}
14539
14541NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
14542 SmallVector<CombineToTry> Strategies;
14543 switch (Root->getOpcode()) {
14544 case ISD::ADD:
14545 case ISD::SUB:
14546 case ISD::OR:
14547 case RISCVISD::ADD_VL:
14548 case RISCVISD::SUB_VL:
14549 case RISCVISD::FADD_VL:
14550 case RISCVISD::FSUB_VL:
14551 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
14552 Strategies.push_back(canFoldToVWWithSameExtension);
14553 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
14554 Strategies.push_back(canFoldToVW_W);
14555 break;
14556 case RISCVISD::FMUL_VL:
14561 Strategies.push_back(canFoldToVWWithSameExtension);
14562 break;
14563 case ISD::MUL:
14564 case RISCVISD::MUL_VL:
14565 // mul -> vwmul(u)
14566 Strategies.push_back(canFoldToVWWithSameExtension);
14567 // mul -> vwmulsu
14568 Strategies.push_back(canFoldToVW_SU);
14569 break;
14570 case ISD::SHL:
14571 case RISCVISD::SHL_VL:
14572 // shl -> vwsll
14573 Strategies.push_back(canFoldToVWWithZEXT);
14574 break;
14577 // vwadd_w|vwsub_w -> vwadd|vwsub
14578 Strategies.push_back(canFoldToVWWithSEXT);
14579 break;
14582 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
14583 Strategies.push_back(canFoldToVWWithZEXT);
14584 break;
14587 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
14588 Strategies.push_back(canFoldToVWWithFPEXT);
14589 break;
14590 default:
14591 llvm_unreachable("Unexpected opcode");
14592 }
14593 return Strategies;
14594}
14595} // End anonymous namespace.
14596
14597/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
14598/// The supported combines are:
14599/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14600/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14601/// mul | mul_vl -> vwmul(u) | vwmul_su
14602/// shl | shl_vl -> vwsll
14603/// fadd_vl -> vfwadd | vfwadd_w
14604/// fsub_vl -> vfwsub | vfwsub_w
14605/// fmul_vl -> vfwmul
14606/// vwadd_w(u) -> vwadd(u)
14607/// vwsub_w(u) -> vwsub(u)
14608/// vfwadd_w -> vfwadd
14609/// vfwsub_w -> vfwsub
14612 const RISCVSubtarget &Subtarget) {
14613 SelectionDAG &DAG = DCI.DAG;
14614 if (DCI.isBeforeLegalize())
14615 return SDValue();
14616
14617 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
14618 return SDValue();
14619
14620 SmallVector<SDNode *> Worklist;
14621 SmallSet<SDNode *, 8> Inserted;
14622 Worklist.push_back(N);
14623 Inserted.insert(N);
14624 SmallVector<CombineResult> CombinesToApply;
14625
14626 while (!Worklist.empty()) {
14627 SDNode *Root = Worklist.pop_back_val();
14628
14629 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
14630 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
14631 auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
14632 &Inserted](const NodeExtensionHelper &Op) {
14633 if (Op.needToPromoteOtherUsers()) {
14634 for (SDNode::use_iterator UI = Op.OrigOperand->use_begin(),
14635 UE = Op.OrigOperand->use_end();
14636 UI != UE; ++UI) {
14637 SDNode *TheUse = *UI;
14638 if (!NodeExtensionHelper::isSupportedRoot(TheUse, Subtarget))
14639 return false;
14640 // We only support the first 2 operands of FMA.
14641 if (UI.getOperandNo() >= 2)
14642 return false;
14643 if (Inserted.insert(TheUse).second)
14644 Worklist.push_back(TheUse);
14645 }
14646 }
14647 return true;
14648 };
14649
14650 // Control the compile time by limiting the number of node we look at in
14651 // total.
14652 if (Inserted.size() > ExtensionMaxWebSize)
14653 return SDValue();
14654
14656 NodeExtensionHelper::getSupportedFoldings(Root);
14657
14658 assert(!FoldingStrategies.empty() && "Nothing to be folded");
14659 bool Matched = false;
14660 for (int Attempt = 0;
14661 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
14662 ++Attempt) {
14663
14664 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
14665 FoldingStrategies) {
14666 std::optional<CombineResult> Res =
14667 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
14668 if (Res) {
14669 Matched = true;
14670 CombinesToApply.push_back(*Res);
14671 // All the inputs that are extended need to be folded, otherwise
14672 // we would be leaving the old input (since it is may still be used),
14673 // and the new one.
14674 if (Res->LHSExt.has_value())
14675 if (!AppendUsersIfNeeded(LHS))
14676 return SDValue();
14677 if (Res->RHSExt.has_value())
14678 if (!AppendUsersIfNeeded(RHS))
14679 return SDValue();
14680 break;
14681 }
14682 }
14683 std::swap(LHS, RHS);
14684 }
14685 // Right now we do an all or nothing approach.
14686 if (!Matched)
14687 return SDValue();
14688 }
14689 // Store the value for the replacement of the input node separately.
14690 SDValue InputRootReplacement;
14691 // We do the RAUW after we materialize all the combines, because some replaced
14692 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
14693 // some of these nodes may appear in the NodeExtensionHelpers of some of the
14694 // yet-to-be-visited CombinesToApply roots.
14696 ValuesToReplace.reserve(CombinesToApply.size());
14697 for (CombineResult Res : CombinesToApply) {
14698 SDValue NewValue = Res.materialize(DAG, Subtarget);
14699 if (!InputRootReplacement) {
14700 assert(Res.Root == N &&
14701 "First element is expected to be the current node");
14702 InputRootReplacement = NewValue;
14703 } else {
14704 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
14705 }
14706 }
14707 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
14708 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
14709 DCI.AddToWorklist(OldNewValues.second.getNode());
14710 }
14711 return InputRootReplacement;
14712}
14713
14714// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
14715// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
14716// y will be the Passthru and cond will be the Mask.
14718 unsigned Opc = N->getOpcode();
14721
14722 SDValue Y = N->getOperand(0);
14723 SDValue MergeOp = N->getOperand(1);
14724 unsigned MergeOpc = MergeOp.getOpcode();
14725
14726 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
14727 return SDValue();
14728
14729 SDValue X = MergeOp->getOperand(1);
14730
14731 if (!MergeOp.hasOneUse())
14732 return SDValue();
14733
14734 // Passthru should be undef
14735 SDValue Passthru = N->getOperand(2);
14736 if (!Passthru.isUndef())
14737 return SDValue();
14738
14739 // Mask should be all ones
14740 SDValue Mask = N->getOperand(3);
14741 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
14742 return SDValue();
14743
14744 // False value of MergeOp should be all zeros
14745 SDValue Z = MergeOp->getOperand(2);
14746
14747 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
14748 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
14749 Z = Z.getOperand(1);
14750
14751 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
14752 return SDValue();
14753
14754 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
14755 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
14756 N->getFlags());
14757}
14758
14761 const RISCVSubtarget &Subtarget) {
14762 [[maybe_unused]] unsigned Opc = N->getOpcode();
14765
14766 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
14767 return V;
14768
14769 return combineVWADDSUBWSelect(N, DCI.DAG);
14770}
14771
14772// Helper function for performMemPairCombine.
14773// Try to combine the memory loads/stores LSNode1 and LSNode2
14774// into a single memory pair operation.
14776 LSBaseSDNode *LSNode2, SDValue BasePtr,
14777 uint64_t Imm) {
14779 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
14780
14781 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
14782 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
14783 return SDValue();
14784
14786 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14787
14788 // The new operation has twice the width.
14789 MVT XLenVT = Subtarget.getXLenVT();
14790 EVT MemVT = LSNode1->getMemoryVT();
14791 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
14792 MachineMemOperand *MMO = LSNode1->getMemOperand();
14794 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
14795
14796 if (LSNode1->getOpcode() == ISD::LOAD) {
14797 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
14798 unsigned Opcode;
14799 if (MemVT == MVT::i32)
14800 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
14801 else
14802 Opcode = RISCVISD::TH_LDD;
14803
14804 SDValue Res = DAG.getMemIntrinsicNode(
14805 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
14806 {LSNode1->getChain(), BasePtr,
14807 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14808 NewMemVT, NewMMO);
14809
14810 SDValue Node1 =
14811 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
14812 SDValue Node2 =
14813 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
14814
14815 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
14816 return Node1;
14817 } else {
14818 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
14819
14820 SDValue Res = DAG.getMemIntrinsicNode(
14821 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
14822 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
14823 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14824 NewMemVT, NewMMO);
14825
14826 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
14827 return Res;
14828 }
14829}
14830
14831// Try to combine two adjacent loads/stores to a single pair instruction from
14832// the XTHeadMemPair vendor extension.
14835 SelectionDAG &DAG = DCI.DAG;
14837 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14838
14839 // Target does not support load/store pair.
14840 if (!Subtarget.hasVendorXTHeadMemPair())
14841 return SDValue();
14842
14843 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
14844 EVT MemVT = LSNode1->getMemoryVT();
14845 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
14846
14847 // No volatile, indexed or atomic loads/stores.
14848 if (!LSNode1->isSimple() || LSNode1->isIndexed())
14849 return SDValue();
14850
14851 // Function to get a base + constant representation from a memory value.
14852 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
14853 if (Ptr->getOpcode() == ISD::ADD)
14854 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
14855 return {Ptr->getOperand(0), C1->getZExtValue()};
14856 return {Ptr, 0};
14857 };
14858
14859 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
14860
14861 SDValue Chain = N->getOperand(0);
14862 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
14863 UI != UE; ++UI) {
14864 SDUse &Use = UI.getUse();
14865 if (Use.getUser() != N && Use.getResNo() == 0 &&
14866 Use.getUser()->getOpcode() == N->getOpcode()) {
14867 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
14868
14869 // No volatile, indexed or atomic loads/stores.
14870 if (!LSNode2->isSimple() || LSNode2->isIndexed())
14871 continue;
14872
14873 // Check if LSNode1 and LSNode2 have the same type and extension.
14874 if (LSNode1->getOpcode() == ISD::LOAD)
14875 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
14876 cast<LoadSDNode>(LSNode1)->getExtensionType())
14877 continue;
14878
14879 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
14880 continue;
14881
14882 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
14883
14884 // Check if the base pointer is the same for both instruction.
14885 if (Base1 != Base2)
14886 continue;
14887
14888 // Check if the offsets match the XTHeadMemPair encoding contraints.
14889 bool Valid = false;
14890 if (MemVT == MVT::i32) {
14891 // Check for adjacent i32 values and a 2-bit index.
14892 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
14893 Valid = true;
14894 } else if (MemVT == MVT::i64) {
14895 // Check for adjacent i64 values and a 2-bit index.
14896 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
14897 Valid = true;
14898 }
14899
14900 if (!Valid)
14901 continue;
14902
14903 // Try to combine.
14904 if (SDValue Res =
14905 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
14906 return Res;
14907 }
14908 }
14909
14910 return SDValue();
14911}
14912
14913// Fold
14914// (fp_to_int (froundeven X)) -> fcvt X, rne
14915// (fp_to_int (ftrunc X)) -> fcvt X, rtz
14916// (fp_to_int (ffloor X)) -> fcvt X, rdn
14917// (fp_to_int (fceil X)) -> fcvt X, rup
14918// (fp_to_int (fround X)) -> fcvt X, rmm
14919// (fp_to_int (frint X)) -> fcvt X
14922 const RISCVSubtarget &Subtarget) {
14923 SelectionDAG &DAG = DCI.DAG;
14924 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14925 MVT XLenVT = Subtarget.getXLenVT();
14926
14927 SDValue Src = N->getOperand(0);
14928
14929 // Don't do this for strict-fp Src.
14930 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
14931 return SDValue();
14932
14933 // Ensure the FP type is legal.
14934 if (!TLI.isTypeLegal(Src.getValueType()))
14935 return SDValue();
14936
14937 // Don't do this for f16 with Zfhmin and not Zfh.
14938 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
14939 return SDValue();
14940
14941 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
14942 // If the result is invalid, we didn't find a foldable instruction.
14943 if (FRM == RISCVFPRndMode::Invalid)
14944 return SDValue();
14945
14946 SDLoc DL(N);
14947 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
14948 EVT VT = N->getValueType(0);
14949
14950 if (VT.isVector() && TLI.isTypeLegal(VT)) {
14951 MVT SrcVT = Src.getSimpleValueType();
14952 MVT SrcContainerVT = SrcVT;
14953 MVT ContainerVT = VT.getSimpleVT();
14954 SDValue XVal = Src.getOperand(0);
14955
14956 // For widening and narrowing conversions we just combine it into a
14957 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
14958 // end up getting lowered to their appropriate pseudo instructions based on
14959 // their operand types
14960 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
14961 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
14962 return SDValue();
14963
14964 // Make fixed-length vectors scalable first
14965 if (SrcVT.isFixedLengthVector()) {
14966 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
14967 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
14968 ContainerVT =
14969 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
14970 }
14971
14972 auto [Mask, VL] =
14973 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
14974
14975 SDValue FpToInt;
14976 if (FRM == RISCVFPRndMode::RTZ) {
14977 // Use the dedicated trunc static rounding mode if we're truncating so we
14978 // don't need to generate calls to fsrmi/fsrm
14979 unsigned Opc =
14981 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
14982 } else if (FRM == RISCVFPRndMode::DYN) {
14983 unsigned Opc =
14985 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
14986 } else {
14987 unsigned Opc =
14989 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
14990 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
14991 }
14992
14993 // If converted from fixed-length to scalable, convert back
14994 if (VT.isFixedLengthVector())
14995 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
14996
14997 return FpToInt;
14998 }
14999
15000 // Only handle XLen or i32 types. Other types narrower than XLen will
15001 // eventually be legalized to XLenVT.
15002 if (VT != MVT::i32 && VT != XLenVT)
15003 return SDValue();
15004
15005 unsigned Opc;
15006 if (VT == XLenVT)
15007 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
15008 else
15010
15011 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
15012 DAG.getTargetConstant(FRM, DL, XLenVT));
15013 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
15014}
15015
15016// Fold
15017// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
15018// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
15019// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
15020// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
15021// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
15022// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
15025 const RISCVSubtarget &Subtarget) {
15026 SelectionDAG &DAG = DCI.DAG;
15027 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15028 MVT XLenVT = Subtarget.getXLenVT();
15029
15030 // Only handle XLen types. Other types narrower than XLen will eventually be
15031 // legalized to XLenVT.
15032 EVT DstVT = N->getValueType(0);
15033 if (DstVT != XLenVT)
15034 return SDValue();
15035
15036 SDValue Src = N->getOperand(0);
15037
15038 // Don't do this for strict-fp Src.
15039 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
15040 return SDValue();
15041
15042 // Ensure the FP type is also legal.
15043 if (!TLI.isTypeLegal(Src.getValueType()))
15044 return SDValue();
15045
15046 // Don't do this for f16 with Zfhmin and not Zfh.
15047 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
15048 return SDValue();
15049
15050 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
15051
15052 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
15053 if (FRM == RISCVFPRndMode::Invalid)
15054 return SDValue();
15055
15056 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
15057
15058 unsigned Opc;
15059 if (SatVT == DstVT)
15060 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
15061 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
15063 else
15064 return SDValue();
15065 // FIXME: Support other SatVTs by clamping before or after the conversion.
15066
15067 Src = Src.getOperand(0);
15068
15069 SDLoc DL(N);
15070 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
15071 DAG.getTargetConstant(FRM, DL, XLenVT));
15072
15073 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
15074 // extend.
15075 if (Opc == RISCVISD::FCVT_WU_RV64)
15076 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
15077
15078 // RISC-V FP-to-int conversions saturate to the destination register size, but
15079 // don't produce 0 for nan.
15080 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
15081 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
15082}
15083
15084// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
15085// smaller than XLenVT.
15087 const RISCVSubtarget &Subtarget) {
15088 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
15089
15090 SDValue Src = N->getOperand(0);
15091 if (Src.getOpcode() != ISD::BSWAP)
15092 return SDValue();
15093
15094 EVT VT = N->getValueType(0);
15095 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
15096 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
15097 return SDValue();
15098
15099 SDLoc DL(N);
15100 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
15101}
15102
15103// Convert from one FMA opcode to another based on whether we are negating the
15104// multiply result and/or the accumulator.
15105// NOTE: Only supports RVV operations with VL.
15106static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
15107 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
15108 if (NegMul) {
15109 // clang-format off
15110 switch (Opcode) {
15111 default: llvm_unreachable("Unexpected opcode");
15112 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
15113 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
15114 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
15115 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
15120 }
15121 // clang-format on
15122 }
15123
15124 // Negating the accumulator changes ADD<->SUB.
15125 if (NegAcc) {
15126 // clang-format off
15127 switch (Opcode) {
15128 default: llvm_unreachable("Unexpected opcode");
15129 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
15130 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
15131 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
15132 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
15137 }
15138 // clang-format on
15139 }
15140
15141 return Opcode;
15142}
15143
15145 // Fold FNEG_VL into FMA opcodes.
15146 // The first operand of strict-fp is chain.
15147 unsigned Offset = N->isTargetStrictFPOpcode();
15148 SDValue A = N->getOperand(0 + Offset);
15149 SDValue B = N->getOperand(1 + Offset);
15150 SDValue C = N->getOperand(2 + Offset);
15151 SDValue Mask = N->getOperand(3 + Offset);
15152 SDValue VL = N->getOperand(4 + Offset);
15153
15154 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
15155 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
15156 V.getOperand(2) == VL) {
15157 // Return the negated input.
15158 V = V.getOperand(0);
15159 return true;
15160 }
15161
15162 return false;
15163 };
15164
15165 bool NegA = invertIfNegative(A);
15166 bool NegB = invertIfNegative(B);
15167 bool NegC = invertIfNegative(C);
15168
15169 // If no operands are negated, we're done.
15170 if (!NegA && !NegB && !NegC)
15171 return SDValue();
15172
15173 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
15174 if (N->isTargetStrictFPOpcode())
15175 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
15176 {N->getOperand(0), A, B, C, Mask, VL});
15177 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
15178 VL);
15179}
15180
15183 const RISCVSubtarget &Subtarget) {
15184 SelectionDAG &DAG = DCI.DAG;
15185
15187 return V;
15188
15189 if (N->getValueType(0).getVectorElementType() == MVT::f32 &&
15190 !Subtarget.hasVInstructionsF16())
15191 return SDValue();
15192
15193 // FIXME: Ignore strict opcodes for now.
15194 if (N->isTargetStrictFPOpcode())
15195 return SDValue();
15196
15197 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
15198}
15199
15201 const RISCVSubtarget &Subtarget) {
15202 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
15203
15204 EVT VT = N->getValueType(0);
15205
15206 if (VT != Subtarget.getXLenVT())
15207 return SDValue();
15208
15209 if (!isa<ConstantSDNode>(N->getOperand(1)))
15210 return SDValue();
15211 uint64_t ShAmt = N->getConstantOperandVal(1);
15212
15213 SDValue N0 = N->getOperand(0);
15214
15215 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
15216 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
15217 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
15218 unsigned ExtSize =
15219 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
15220 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
15221 N0.getOperand(0).hasOneUse() &&
15222 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
15223 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
15224 if (LShAmt < ExtSize) {
15225 unsigned Size = VT.getSizeInBits();
15226 SDLoc ShlDL(N0.getOperand(0));
15227 SDValue Shl =
15228 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
15229 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
15230 SDLoc DL(N);
15231 return DAG.getNode(ISD::SRA, DL, VT, Shl,
15232 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
15233 }
15234 }
15235 }
15236
15237 if (ShAmt > 32 || VT != MVT::i64)
15238 return SDValue();
15239
15240 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
15241 // FIXME: Should this be a generic combine? There's a similar combine on X86.
15242 //
15243 // Also try these folds where an add or sub is in the middle.
15244 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
15245 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
15246 SDValue Shl;
15247 ConstantSDNode *AddC = nullptr;
15248
15249 // We might have an ADD or SUB between the SRA and SHL.
15250 bool IsAdd = N0.getOpcode() == ISD::ADD;
15251 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
15252 // Other operand needs to be a constant we can modify.
15253 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
15254 if (!AddC)
15255 return SDValue();
15256
15257 // AddC needs to have at least 32 trailing zeros.
15258 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
15259 return SDValue();
15260
15261 // All users should be a shift by constant less than or equal to 32. This
15262 // ensures we'll do this optimization for each of them to produce an
15263 // add/sub+sext_inreg they can all share.
15264 for (SDNode *U : N0->uses()) {
15265 if (U->getOpcode() != ISD::SRA ||
15266 !isa<ConstantSDNode>(U->getOperand(1)) ||
15267 U->getConstantOperandVal(1) > 32)
15268 return SDValue();
15269 }
15270
15271 Shl = N0.getOperand(IsAdd ? 0 : 1);
15272 } else {
15273 // Not an ADD or SUB.
15274 Shl = N0;
15275 }
15276
15277 // Look for a shift left by 32.
15278 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
15279 Shl.getConstantOperandVal(1) != 32)
15280 return SDValue();
15281
15282 // We if we didn't look through an add/sub, then the shl should have one use.
15283 // If we did look through an add/sub, the sext_inreg we create is free so
15284 // we're only creating 2 new instructions. It's enough to only remove the
15285 // original sra+add/sub.
15286 if (!AddC && !Shl.hasOneUse())
15287 return SDValue();
15288
15289 SDLoc DL(N);
15290 SDValue In = Shl.getOperand(0);
15291
15292 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
15293 // constant.
15294 if (AddC) {
15295 SDValue ShiftedAddC =
15296 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
15297 if (IsAdd)
15298 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
15299 else
15300 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
15301 }
15302
15303 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
15304 DAG.getValueType(MVT::i32));
15305 if (ShAmt == 32)
15306 return SExt;
15307
15308 return DAG.getNode(
15309 ISD::SHL, DL, MVT::i64, SExt,
15310 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
15311}
15312
15313// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
15314// the result is used as the conditon of a br_cc or select_cc we can invert,
15315// inverting the setcc is free, and Z is 0/1. Caller will invert the
15316// br_cc/select_cc.
15318 bool IsAnd = Cond.getOpcode() == ISD::AND;
15319 if (!IsAnd && Cond.getOpcode() != ISD::OR)
15320 return SDValue();
15321
15322 if (!Cond.hasOneUse())
15323 return SDValue();
15324
15325 SDValue Setcc = Cond.getOperand(0);
15326 SDValue Xor = Cond.getOperand(1);
15327 // Canonicalize setcc to LHS.
15328 if (Setcc.getOpcode() != ISD::SETCC)
15329 std::swap(Setcc, Xor);
15330 // LHS should be a setcc and RHS should be an xor.
15331 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
15332 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
15333 return SDValue();
15334
15335 // If the condition is an And, SimplifyDemandedBits may have changed
15336 // (xor Z, 1) to (not Z).
15337 SDValue Xor1 = Xor.getOperand(1);
15338 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
15339 return SDValue();
15340
15341 EVT VT = Cond.getValueType();
15342 SDValue Xor0 = Xor.getOperand(0);
15343
15344 // The LHS of the xor needs to be 0/1.
15346 if (!DAG.MaskedValueIsZero(Xor0, Mask))
15347 return SDValue();
15348
15349 // We can only invert integer setccs.
15350 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
15351 if (!SetCCOpVT.isScalarInteger())
15352 return SDValue();
15353
15354 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
15355 if (ISD::isIntEqualitySetCC(CCVal)) {
15356 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15357 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
15358 Setcc.getOperand(1), CCVal);
15359 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
15360 // Invert (setlt 0, X) by converting to (setlt X, 1).
15361 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
15362 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
15363 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
15364 // (setlt X, 1) by converting to (setlt 0, X).
15365 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
15366 DAG.getConstant(0, SDLoc(Setcc), VT),
15367 Setcc.getOperand(0), CCVal);
15368 } else
15369 return SDValue();
15370
15371 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15372 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
15373}
15374
15375// Perform common combines for BR_CC and SELECT_CC condtions.
15376static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
15377 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
15378 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
15379
15380 // As far as arithmetic right shift always saves the sign,
15381 // shift can be omitted.
15382 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
15383 // setge (sra X, N), 0 -> setge X, 0
15384 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
15385 LHS.getOpcode() == ISD::SRA) {
15386 LHS = LHS.getOperand(0);
15387 return true;
15388 }
15389
15390 if (!ISD::isIntEqualitySetCC(CCVal))
15391 return false;
15392
15393 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
15394 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
15395 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
15396 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
15397 // If we're looking for eq 0 instead of ne 0, we need to invert the
15398 // condition.
15399 bool Invert = CCVal == ISD::SETEQ;
15400 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
15401 if (Invert)
15402 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15403
15404 RHS = LHS.getOperand(1);
15405 LHS = LHS.getOperand(0);
15406 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
15407
15408 CC = DAG.getCondCode(CCVal);
15409 return true;
15410 }
15411
15412 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
15413 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
15414 RHS = LHS.getOperand(1);
15415 LHS = LHS.getOperand(0);
15416 return true;
15417 }
15418
15419 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
15420 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
15421 LHS.getOperand(1).getOpcode() == ISD::Constant) {
15422 SDValue LHS0 = LHS.getOperand(0);
15423 if (LHS0.getOpcode() == ISD::AND &&
15424 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
15425 uint64_t Mask = LHS0.getConstantOperandVal(1);
15426 uint64_t ShAmt = LHS.getConstantOperandVal(1);
15427 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
15428 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
15429 CC = DAG.getCondCode(CCVal);
15430
15431 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
15432 LHS = LHS0.getOperand(0);
15433 if (ShAmt != 0)
15434 LHS =
15435 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
15436 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
15437 return true;
15438 }
15439 }
15440 }
15441
15442 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
15443 // This can occur when legalizing some floating point comparisons.
15444 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
15445 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
15446 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15447 CC = DAG.getCondCode(CCVal);
15448 RHS = DAG.getConstant(0, DL, LHS.getValueType());
15449 return true;
15450 }
15451
15452 if (isNullConstant(RHS)) {
15453 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
15454 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15455 CC = DAG.getCondCode(CCVal);
15456 LHS = NewCond;
15457 return true;
15458 }
15459 }
15460
15461 return false;
15462}
15463
15464// Fold
15465// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
15466// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
15467// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
15468// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
15470 SDValue TrueVal, SDValue FalseVal,
15471 bool Swapped) {
15472 bool Commutative = true;
15473 unsigned Opc = TrueVal.getOpcode();
15474 switch (Opc) {
15475 default:
15476 return SDValue();
15477 case ISD::SHL:
15478 case ISD::SRA:
15479 case ISD::SRL:
15480 case ISD::SUB:
15481 Commutative = false;
15482 break;
15483 case ISD::ADD:
15484 case ISD::OR:
15485 case ISD::XOR:
15486 break;
15487 }
15488
15489 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
15490 return SDValue();
15491
15492 unsigned OpToFold;
15493 if (FalseVal == TrueVal.getOperand(0))
15494 OpToFold = 0;
15495 else if (Commutative && FalseVal == TrueVal.getOperand(1))
15496 OpToFold = 1;
15497 else
15498 return SDValue();
15499
15500 EVT VT = N->getValueType(0);
15501 SDLoc DL(N);
15502 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
15503 EVT OtherOpVT = OtherOp.getValueType();
15504 SDValue IdentityOperand =
15505 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
15506 if (!Commutative)
15507 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
15508 assert(IdentityOperand && "No identity operand!");
15509
15510 if (Swapped)
15511 std::swap(OtherOp, IdentityOperand);
15512 SDValue NewSel =
15513 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
15514 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
15515}
15516
15517// This tries to get rid of `select` and `icmp` that are being used to handle
15518// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
15520 SDValue Cond = N->getOperand(0);
15521
15522 // This represents either CTTZ or CTLZ instruction.
15523 SDValue CountZeroes;
15524
15525 SDValue ValOnZero;
15526
15527 if (Cond.getOpcode() != ISD::SETCC)
15528 return SDValue();
15529
15530 if (!isNullConstant(Cond->getOperand(1)))
15531 return SDValue();
15532
15533 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
15534 if (CCVal == ISD::CondCode::SETEQ) {
15535 CountZeroes = N->getOperand(2);
15536 ValOnZero = N->getOperand(1);
15537 } else if (CCVal == ISD::CondCode::SETNE) {
15538 CountZeroes = N->getOperand(1);
15539 ValOnZero = N->getOperand(2);
15540 } else {
15541 return SDValue();
15542 }
15543
15544 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
15545 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
15546 CountZeroes = CountZeroes.getOperand(0);
15547
15548 if (CountZeroes.getOpcode() != ISD::CTTZ &&
15549 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
15550 CountZeroes.getOpcode() != ISD::CTLZ &&
15551 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
15552 return SDValue();
15553
15554 if (!isNullConstant(ValOnZero))
15555 return SDValue();
15556
15557 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
15558 if (Cond->getOperand(0) != CountZeroesArgument)
15559 return SDValue();
15560
15561 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
15562 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
15563 CountZeroes.getValueType(), CountZeroesArgument);
15564 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
15565 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
15566 CountZeroes.getValueType(), CountZeroesArgument);
15567 }
15568
15569 unsigned BitWidth = CountZeroes.getValueSizeInBits();
15570 SDValue BitWidthMinusOne =
15571 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
15572
15573 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
15574 CountZeroes, BitWidthMinusOne);
15575 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
15576}
15577
15579 const RISCVSubtarget &Subtarget) {
15580 SDValue Cond = N->getOperand(0);
15581 SDValue True = N->getOperand(1);
15582 SDValue False = N->getOperand(2);
15583 SDLoc DL(N);
15584 EVT VT = N->getValueType(0);
15585 EVT CondVT = Cond.getValueType();
15586
15587 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
15588 return SDValue();
15589
15590 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
15591 // BEXTI, where C is power of 2.
15592 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
15593 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
15594 SDValue LHS = Cond.getOperand(0);
15595 SDValue RHS = Cond.getOperand(1);
15596 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15597 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
15598 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
15599 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
15600 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
15601 return DAG.getSelect(DL, VT,
15602 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
15603 False, True);
15604 }
15605 }
15606 return SDValue();
15607}
15608
15610 const RISCVSubtarget &Subtarget) {
15611 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
15612 return Folded;
15613
15614 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
15615 return V;
15616
15617 if (Subtarget.hasConditionalMoveFusion())
15618 return SDValue();
15619
15620 SDValue TrueVal = N->getOperand(1);
15621 SDValue FalseVal = N->getOperand(2);
15622 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
15623 return V;
15624 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
15625}
15626
15627/// If we have a build_vector where each lane is binop X, C, where C
15628/// is a constant (but not necessarily the same constant on all lanes),
15629/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
15630/// We assume that materializing a constant build vector will be no more
15631/// expensive that performing O(n) binops.
15633 const RISCVSubtarget &Subtarget,
15634 const RISCVTargetLowering &TLI) {
15635 SDLoc DL(N);
15636 EVT VT = N->getValueType(0);
15637
15638 assert(!VT.isScalableVector() && "unexpected build vector");
15639
15640 if (VT.getVectorNumElements() == 1)
15641 return SDValue();
15642
15643 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
15644 if (!TLI.isBinOp(Opcode))
15645 return SDValue();
15646
15647 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
15648 return SDValue();
15649
15650 // This BUILD_VECTOR involves an implicit truncation, and sinking
15651 // truncates through binops is non-trivial.
15652 if (N->op_begin()->getValueType() != VT.getVectorElementType())
15653 return SDValue();
15654
15655 SmallVector<SDValue> LHSOps;
15656 SmallVector<SDValue> RHSOps;
15657 for (SDValue Op : N->ops()) {
15658 if (Op.isUndef()) {
15659 // We can't form a divide or remainder from undef.
15660 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
15661 return SDValue();
15662
15663 LHSOps.push_back(Op);
15664 RHSOps.push_back(Op);
15665 continue;
15666 }
15667
15668 // TODO: We can handle operations which have an neutral rhs value
15669 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
15670 // of profit in a more explicit manner.
15671 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
15672 return SDValue();
15673
15674 LHSOps.push_back(Op.getOperand(0));
15675 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
15676 !isa<ConstantFPSDNode>(Op.getOperand(1)))
15677 return SDValue();
15678 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15679 // have different LHS and RHS types.
15680 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
15681 return SDValue();
15682
15683 RHSOps.push_back(Op.getOperand(1));
15684 }
15685
15686 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
15687 DAG.getBuildVector(VT, DL, RHSOps));
15688}
15689
15691 const RISCVSubtarget &Subtarget,
15692 const RISCVTargetLowering &TLI) {
15693 SDValue InVec = N->getOperand(0);
15694 SDValue InVal = N->getOperand(1);
15695 SDValue EltNo = N->getOperand(2);
15696 SDLoc DL(N);
15697
15698 EVT VT = InVec.getValueType();
15699 if (VT.isScalableVector())
15700 return SDValue();
15701
15702 if (!InVec.hasOneUse())
15703 return SDValue();
15704
15705 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
15706 // move the insert_vector_elts into the arms of the binop. Note that
15707 // the new RHS must be a constant.
15708 const unsigned InVecOpcode = InVec->getOpcode();
15709 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
15710 InVal.hasOneUse()) {
15711 SDValue InVecLHS = InVec->getOperand(0);
15712 SDValue InVecRHS = InVec->getOperand(1);
15713 SDValue InValLHS = InVal->getOperand(0);
15714 SDValue InValRHS = InVal->getOperand(1);
15715
15717 return SDValue();
15718 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
15719 return SDValue();
15720 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15721 // have different LHS and RHS types.
15722 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
15723 return SDValue();
15725 InVecLHS, InValLHS, EltNo);
15727 InVecRHS, InValRHS, EltNo);
15728 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
15729 }
15730
15731 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
15732 // move the insert_vector_elt to the source operand of the concat_vector.
15733 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
15734 return SDValue();
15735
15736 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
15737 if (!IndexC)
15738 return SDValue();
15739 unsigned Elt = IndexC->getZExtValue();
15740
15741 EVT ConcatVT = InVec.getOperand(0).getValueType();
15742 if (ConcatVT.getVectorElementType() != InVal.getValueType())
15743 return SDValue();
15744 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
15745 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
15746
15747 unsigned ConcatOpIdx = Elt / ConcatNumElts;
15748 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
15749 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
15750 ConcatOp, InVal, NewIdx);
15751
15752 SmallVector<SDValue> ConcatOps;
15753 ConcatOps.append(InVec->op_begin(), InVec->op_end());
15754 ConcatOps[ConcatOpIdx] = ConcatOp;
15755 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15756}
15757
15758// If we're concatenating a series of vector loads like
15759// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
15760// Then we can turn this into a strided load by widening the vector elements
15761// vlse32 p, stride=n
15763 const RISCVSubtarget &Subtarget,
15764 const RISCVTargetLowering &TLI) {
15765 SDLoc DL(N);
15766 EVT VT = N->getValueType(0);
15767
15768 // Only perform this combine on legal MVTs.
15769 if (!TLI.isTypeLegal(VT))
15770 return SDValue();
15771
15772 // TODO: Potentially extend this to scalable vectors
15773 if (VT.isScalableVector())
15774 return SDValue();
15775
15776 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
15777 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
15778 !SDValue(BaseLd, 0).hasOneUse())
15779 return SDValue();
15780
15781 EVT BaseLdVT = BaseLd->getValueType(0);
15782
15783 // Go through the loads and check that they're strided
15785 Lds.push_back(BaseLd);
15786 Align Align = BaseLd->getAlign();
15787 for (SDValue Op : N->ops().drop_front()) {
15788 auto *Ld = dyn_cast<LoadSDNode>(Op);
15789 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
15790 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
15791 Ld->getValueType(0) != BaseLdVT)
15792 return SDValue();
15793
15794 Lds.push_back(Ld);
15795
15796 // The common alignment is the most restrictive (smallest) of all the loads
15797 Align = std::min(Align, Ld->getAlign());
15798 }
15799
15800 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
15801 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
15802 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
15803 // If the load ptrs can be decomposed into a common (Base + Index) with a
15804 // common constant stride, then return the constant stride.
15805 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
15806 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
15807 if (BIO1.equalBaseIndex(BIO2, DAG))
15808 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
15809
15810 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
15811 SDValue P1 = Ld1->getBasePtr();
15812 SDValue P2 = Ld2->getBasePtr();
15813 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
15814 return {{P2.getOperand(1), false}};
15815 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
15816 return {{P1.getOperand(1), true}};
15817
15818 return std::nullopt;
15819 };
15820
15821 // Get the distance between the first and second loads
15822 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
15823 if (!BaseDiff)
15824 return SDValue();
15825
15826 // Check all the loads are the same distance apart
15827 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
15828 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
15829 return SDValue();
15830
15831 // TODO: At this point, we've successfully matched a generalized gather
15832 // load. Maybe we should emit that, and then move the specialized
15833 // matchers above and below into a DAG combine?
15834
15835 // Get the widened scalar type, e.g. v4i8 -> i64
15836 unsigned WideScalarBitWidth =
15837 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
15838 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
15839
15840 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
15841 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
15842 if (!TLI.isTypeLegal(WideVecVT))
15843 return SDValue();
15844
15845 // Check that the operation is legal
15846 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
15847 return SDValue();
15848
15849 auto [StrideVariant, MustNegateStride] = *BaseDiff;
15850 SDValue Stride = std::holds_alternative<SDValue>(StrideVariant)
15851 ? std::get<SDValue>(StrideVariant)
15852 : DAG.getConstant(std::get<int64_t>(StrideVariant), DL,
15853 Lds[0]->getOffset().getValueType());
15854 if (MustNegateStride)
15855 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
15856
15857 SDValue AllOneMask =
15858 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
15859 DAG.getConstant(1, DL, MVT::i1));
15860
15861 uint64_t MemSize;
15862 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
15863 ConstStride && ConstStride->getSExtValue() >= 0)
15864 // total size = (elsize * n) + (stride - elsize) * (n-1)
15865 // = elsize + stride * (n-1)
15866 MemSize = WideScalarVT.getSizeInBits() +
15867 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
15868 else
15869 // If Stride isn't constant, then we can't know how much it will load
15871
15873 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
15874 Align);
15875
15876 SDValue StridedLoad = DAG.getStridedLoadVP(
15877 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
15878 AllOneMask,
15879 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
15880
15881 for (SDValue Ld : N->ops())
15882 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
15883
15884 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
15885}
15886
15888 const RISCVSubtarget &Subtarget) {
15889
15890 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
15891
15892 if (N->getValueType(0).isFixedLengthVector())
15893 return SDValue();
15894
15895 SDValue Addend = N->getOperand(0);
15896 SDValue MulOp = N->getOperand(1);
15897
15898 if (N->getOpcode() == RISCVISD::ADD_VL) {
15899 SDValue AddPassthruOp = N->getOperand(2);
15900 if (!AddPassthruOp.isUndef())
15901 return SDValue();
15902 }
15903
15904 auto IsVWMulOpc = [](unsigned Opc) {
15905 switch (Opc) {
15906 case RISCVISD::VWMUL_VL:
15909 return true;
15910 default:
15911 return false;
15912 }
15913 };
15914
15915 if (!IsVWMulOpc(MulOp.getOpcode()))
15916 std::swap(Addend, MulOp);
15917
15918 if (!IsVWMulOpc(MulOp.getOpcode()))
15919 return SDValue();
15920
15921 SDValue MulPassthruOp = MulOp.getOperand(2);
15922
15923 if (!MulPassthruOp.isUndef())
15924 return SDValue();
15925
15926 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
15927 const RISCVSubtarget &Subtarget) {
15928 if (N->getOpcode() == ISD::ADD) {
15929 SDLoc DL(N);
15930 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
15931 Subtarget);
15932 }
15933 return std::make_pair(N->getOperand(3), N->getOperand(4));
15934 }(N, DAG, Subtarget);
15935
15936 SDValue MulMask = MulOp.getOperand(3);
15937 SDValue MulVL = MulOp.getOperand(4);
15938
15939 if (AddMask != MulMask || AddVL != MulVL)
15940 return SDValue();
15941
15942 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
15943 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
15944 "Unexpected opcode after VWMACC_VL");
15945 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
15946 "Unexpected opcode after VWMACC_VL!");
15947 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
15948 "Unexpected opcode after VWMUL_VL!");
15949 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
15950 "Unexpected opcode after VWMUL_VL!");
15951
15952 SDLoc DL(N);
15953 EVT VT = N->getValueType(0);
15954 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
15955 AddVL};
15956 return DAG.getNode(Opc, DL, VT, Ops);
15957}
15958
15960 ISD::MemIndexType &IndexType,
15962 if (!DCI.isBeforeLegalize())
15963 return false;
15964
15965 SelectionDAG &DAG = DCI.DAG;
15966 const MVT XLenVT =
15967 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
15968
15969 const EVT IndexVT = Index.getValueType();
15970
15971 // RISC-V indexed loads only support the "unsigned unscaled" addressing
15972 // mode, so anything else must be manually legalized.
15973 if (!isIndexTypeSigned(IndexType))
15974 return false;
15975
15976 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
15977 // Any index legalization should first promote to XLenVT, so we don't lose
15978 // bits when scaling. This may create an illegal index type so we let
15979 // LLVM's legalization take care of the splitting.
15980 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
15982 IndexVT.changeVectorElementType(XLenVT), Index);
15983 }
15984 IndexType = ISD::UNSIGNED_SCALED;
15985 return true;
15986}
15987
15988/// Match the index vector of a scatter or gather node as the shuffle mask
15989/// which performs the rearrangement if possible. Will only match if
15990/// all lanes are touched, and thus replacing the scatter or gather with
15991/// a unit strided access and shuffle is legal.
15993 SmallVector<int> &ShuffleMask) {
15994 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
15995 return false;
15997 return false;
15998
15999 const unsigned ElementSize = VT.getScalarStoreSize();
16000 const unsigned NumElems = VT.getVectorNumElements();
16001
16002 // Create the shuffle mask and check all bits active
16003 assert(ShuffleMask.empty());
16004 BitVector ActiveLanes(NumElems);
16005 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
16006 // TODO: We've found an active bit of UB, and could be
16007 // more aggressive here if desired.
16008 if (Index->getOperand(i)->isUndef())
16009 return false;
16010 uint64_t C = Index->getConstantOperandVal(i);
16011 if (C % ElementSize != 0)
16012 return false;
16013 C = C / ElementSize;
16014 if (C >= NumElems)
16015 return false;
16016 ShuffleMask.push_back(C);
16017 ActiveLanes.set(C);
16018 }
16019 return ActiveLanes.all();
16020}
16021
16022/// Match the index of a gather or scatter operation as an operation
16023/// with twice the element width and half the number of elements. This is
16024/// generally profitable (if legal) because these operations are linear
16025/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
16026/// come out ahead.
16028 Align BaseAlign, const RISCVSubtarget &ST) {
16029 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
16030 return false;
16032 return false;
16033
16034 // Attempt a doubling. If we can use a element type 4x or 8x in
16035 // size, this will happen via multiply iterations of the transform.
16036 const unsigned NumElems = VT.getVectorNumElements();
16037 if (NumElems % 2 != 0)
16038 return false;
16039
16040 const unsigned ElementSize = VT.getScalarStoreSize();
16041 const unsigned WiderElementSize = ElementSize * 2;
16042 if (WiderElementSize > ST.getELen()/8)
16043 return false;
16044
16045 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
16046 return false;
16047
16048 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
16049 // TODO: We've found an active bit of UB, and could be
16050 // more aggressive here if desired.
16051 if (Index->getOperand(i)->isUndef())
16052 return false;
16053 // TODO: This offset check is too strict if we support fully
16054 // misaligned memory operations.
16055 uint64_t C = Index->getConstantOperandVal(i);
16056 if (i % 2 == 0) {
16057 if (C % WiderElementSize != 0)
16058 return false;
16059 continue;
16060 }
16061 uint64_t Last = Index->getConstantOperandVal(i-1);
16062 if (C != Last + ElementSize)
16063 return false;
16064 }
16065 return true;
16066}
16067
16068// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
16069// This would be benefit for the cases where X and Y are both the same value
16070// type of low precision vectors. Since the truncate would be lowered into
16071// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
16072// restriction, such pattern would be expanded into a series of "vsetvli"
16073// and "vnsrl" instructions later to reach this point.
16075 SDValue Mask = N->getOperand(1);
16076 SDValue VL = N->getOperand(2);
16077
16078 bool IsVLMAX = isAllOnesConstant(VL) ||
16079 (isa<RegisterSDNode>(VL) &&
16080 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
16081 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
16082 Mask.getOperand(0) != VL)
16083 return SDValue();
16084
16085 auto IsTruncNode = [&](SDValue V) {
16086 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
16087 V.getOperand(1) == Mask && V.getOperand(2) == VL;
16088 };
16089
16090 SDValue Op = N->getOperand(0);
16091
16092 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
16093 // to distinguish such pattern.
16094 while (IsTruncNode(Op)) {
16095 if (!Op.hasOneUse())
16096 return SDValue();
16097 Op = Op.getOperand(0);
16098 }
16099
16100 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
16101 return SDValue();
16102
16103 SDValue N0 = Op.getOperand(0);
16104 SDValue N1 = Op.getOperand(1);
16105 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
16106 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
16107 return SDValue();
16108
16109 SDValue N00 = N0.getOperand(0);
16110 SDValue N10 = N1.getOperand(0);
16111 if (!N00.getValueType().isVector() ||
16112 N00.getValueType() != N10.getValueType() ||
16113 N->getValueType(0) != N10.getValueType())
16114 return SDValue();
16115
16116 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
16117 SDValue SMin =
16118 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
16119 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
16120 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
16121}
16122
16123// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
16124// maximum value for the truncated type.
16125// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
16126// is the signed maximum value for the truncated type and C2 is the signed
16127// minimum value.
16129 const RISCVSubtarget &Subtarget) {
16130 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
16131
16132 MVT VT = N->getSimpleValueType(0);
16133
16134 SDValue Mask = N->getOperand(1);
16135 SDValue VL = N->getOperand(2);
16136
16137 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
16138 APInt &SplatVal) {
16139 if (V.getOpcode() != Opc &&
16140 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
16141 V.getOperand(3) == Mask && V.getOperand(4) == VL))
16142 return SDValue();
16143
16144 SDValue Op = V.getOperand(1);
16145
16146 // Peek through conversion between fixed and scalable vectors.
16147 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
16148 isNullConstant(Op.getOperand(2)) &&
16149 Op.getOperand(1).getValueType().isFixedLengthVector() &&
16150 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16151 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
16152 isNullConstant(Op.getOperand(1).getOperand(1)))
16153 Op = Op.getOperand(1).getOperand(0);
16154
16155 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
16156 return V.getOperand(0);
16157
16158 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
16159 Op.getOperand(2) == VL) {
16160 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
16161 SplatVal =
16162 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
16163 return V.getOperand(0);
16164 }
16165 }
16166
16167 return SDValue();
16168 };
16169
16170 SDLoc DL(N);
16171
16172 auto DetectUSatPattern = [&](SDValue V) {
16173 APInt LoC, HiC;
16174
16175 // Simple case, V is a UMIN.
16176 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
16177 if (HiC.isMask(VT.getScalarSizeInBits()))
16178 return UMinOp;
16179
16180 // If we have an SMAX that removes negative numbers first, then we can match
16181 // SMIN instead of UMIN.
16182 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16183 if (SDValue SMaxOp =
16184 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16185 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
16186 return SMinOp;
16187
16188 // If we have an SMIN before an SMAX and the SMAX constant is less than or
16189 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
16190 // first.
16191 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16192 if (SDValue SMinOp =
16193 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16194 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
16195 HiC.uge(LoC))
16196 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
16197 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
16198 Mask, VL);
16199
16200 return SDValue();
16201 };
16202
16203 auto DetectSSatPattern = [&](SDValue V) {
16204 unsigned NumDstBits = VT.getScalarSizeInBits();
16205 unsigned NumSrcBits = V.getScalarValueSizeInBits();
16206 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
16207 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
16208
16209 APInt HiC, LoC;
16210 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16211 if (SDValue SMaxOp =
16212 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16213 if (HiC == SignedMax && LoC == SignedMin)
16214 return SMaxOp;
16215
16216 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16217 if (SDValue SMinOp =
16218 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16219 if (HiC == SignedMax && LoC == SignedMin)
16220 return SMinOp;
16221
16222 return SDValue();
16223 };
16224
16225 SDValue Src = N->getOperand(0);
16226
16227 // Look through multiple layers of truncates.
16228 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
16229 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
16230 Src.hasOneUse())
16231 Src = Src.getOperand(0);
16232
16233 SDValue Val;
16234 unsigned ClipOpc;
16235 if ((Val = DetectUSatPattern(Src)))
16237 else if ((Val = DetectSSatPattern(Src)))
16239 else
16240 return SDValue();
16241
16242 MVT ValVT = Val.getSimpleValueType();
16243
16244 do {
16245 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
16246 ValVT = ValVT.changeVectorElementType(ValEltVT);
16247 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
16248 } while (ValVT != VT);
16249
16250 return Val;
16251}
16252
16254 DAGCombinerInfo &DCI) const {
16255 SelectionDAG &DAG = DCI.DAG;
16256 const MVT XLenVT = Subtarget.getXLenVT();
16257 SDLoc DL(N);
16258
16259 // Helper to call SimplifyDemandedBits on an operand of N where only some low
16260 // bits are demanded. N will be added to the Worklist if it was not deleted.
16261 // Caller should return SDValue(N, 0) if this returns true.
16262 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
16263 SDValue Op = N->getOperand(OpNo);
16264 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
16265 if (!SimplifyDemandedBits(Op, Mask, DCI))
16266 return false;
16267
16268 if (N->getOpcode() != ISD::DELETED_NODE)
16269 DCI.AddToWorklist(N);
16270 return true;
16271 };
16272
16273 switch (N->getOpcode()) {
16274 default:
16275 break;
16276 case RISCVISD::SplitF64: {
16277 SDValue Op0 = N->getOperand(0);
16278 // If the input to SplitF64 is just BuildPairF64 then the operation is
16279 // redundant. Instead, use BuildPairF64's operands directly.
16280 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
16281 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
16282
16283 if (Op0->isUndef()) {
16284 SDValue Lo = DAG.getUNDEF(MVT::i32);
16285 SDValue Hi = DAG.getUNDEF(MVT::i32);
16286 return DCI.CombineTo(N, Lo, Hi);
16287 }
16288
16289 // It's cheaper to materialise two 32-bit integers than to load a double
16290 // from the constant pool and transfer it to integer registers through the
16291 // stack.
16292 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
16293 APInt V = C->getValueAPF().bitcastToAPInt();
16294 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
16295 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
16296 return DCI.CombineTo(N, Lo, Hi);
16297 }
16298
16299 // This is a target-specific version of a DAGCombine performed in
16300 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16301 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16302 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16303 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16304 !Op0.getNode()->hasOneUse())
16305 break;
16306 SDValue NewSplitF64 =
16307 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
16308 Op0.getOperand(0));
16309 SDValue Lo = NewSplitF64.getValue(0);
16310 SDValue Hi = NewSplitF64.getValue(1);
16311 APInt SignBit = APInt::getSignMask(32);
16312 if (Op0.getOpcode() == ISD::FNEG) {
16313 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
16314 DAG.getConstant(SignBit, DL, MVT::i32));
16315 return DCI.CombineTo(N, Lo, NewHi);
16316 }
16317 assert(Op0.getOpcode() == ISD::FABS);
16318 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
16319 DAG.getConstant(~SignBit, DL, MVT::i32));
16320 return DCI.CombineTo(N, Lo, NewHi);
16321 }
16322 case RISCVISD::SLLW:
16323 case RISCVISD::SRAW:
16324 case RISCVISD::SRLW:
16325 case RISCVISD::RORW:
16326 case RISCVISD::ROLW: {
16327 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
16328 if (SimplifyDemandedLowBitsHelper(0, 32) ||
16329 SimplifyDemandedLowBitsHelper(1, 5))
16330 return SDValue(N, 0);
16331
16332 break;
16333 }
16334 case RISCVISD::CLZW:
16335 case RISCVISD::CTZW: {
16336 // Only the lower 32 bits of the first operand are read
16337 if (SimplifyDemandedLowBitsHelper(0, 32))
16338 return SDValue(N, 0);
16339 break;
16340 }
16342 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
16343 // conversion is unnecessary and can be replaced with the
16344 // FMV_X_ANYEXTW_RV64 operand.
16345 SDValue Op0 = N->getOperand(0);
16347 return Op0.getOperand(0);
16348 break;
16349 }
16352 SDLoc DL(N);
16353 SDValue Op0 = N->getOperand(0);
16354 MVT VT = N->getSimpleValueType(0);
16355 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
16356 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
16357 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
16358 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
16359 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
16360 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
16361 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
16362 assert(Op0.getOperand(0).getValueType() == VT &&
16363 "Unexpected value type!");
16364 return Op0.getOperand(0);
16365 }
16366
16367 // This is a target-specific version of a DAGCombine performed in
16368 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16369 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16370 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16371 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16372 !Op0.getNode()->hasOneUse())
16373 break;
16374 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
16375 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
16376 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
16377 if (Op0.getOpcode() == ISD::FNEG)
16378 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
16379 DAG.getConstant(SignBit, DL, VT));
16380
16381 assert(Op0.getOpcode() == ISD::FABS);
16382 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
16383 DAG.getConstant(~SignBit, DL, VT));
16384 }
16385 case ISD::ABS: {
16386 EVT VT = N->getValueType(0);
16387 SDValue N0 = N->getOperand(0);
16388 // abs (sext) -> zext (abs)
16389 // abs (zext) -> zext (handled elsewhere)
16390 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
16391 SDValue Src = N0.getOperand(0);
16392 SDLoc DL(N);
16393 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
16394 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
16395 }
16396 break;
16397 }
16398 case ISD::ADD: {
16399 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
16400 return V;
16401 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
16402 return V;
16403 return performADDCombine(N, DCI, Subtarget);
16404 }
16405 case ISD::SUB: {
16406 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
16407 return V;
16408 return performSUBCombine(N, DAG, Subtarget);
16409 }
16410 case ISD::AND:
16411 return performANDCombine(N, DCI, Subtarget);
16412 case ISD::OR: {
16413 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
16414 return V;
16415 return performORCombine(N, DCI, Subtarget);
16416 }
16417 case ISD::XOR:
16418 return performXORCombine(N, DAG, Subtarget);
16419 case ISD::MUL:
16420 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
16421 return V;
16422 return performMULCombine(N, DAG, DCI, Subtarget);
16423 case ISD::SDIV:
16424 case ISD::UDIV:
16425 case ISD::SREM:
16426 case ISD::UREM:
16427 if (SDValue V = combineBinOpOfZExt(N, DAG))
16428 return V;
16429 break;
16430 case ISD::FMUL: {
16431 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
16432 SDValue N0 = N->getOperand(0);
16433 SDValue N1 = N->getOperand(1);
16434 if (N0->getOpcode() != ISD::FCOPYSIGN)
16435 std::swap(N0, N1);
16436 if (N0->getOpcode() != ISD::FCOPYSIGN)
16437 return SDValue();
16438 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0));
16439 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
16440 return SDValue();
16441 EVT VT = N->getValueType(0);
16442 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
16443 return SDValue();
16444 SDValue Sign = N0->getOperand(1);
16445 if (Sign.getValueType() != VT)
16446 return SDValue();
16447 return DAG.getNode(RISCVISD::FSGNJX, SDLoc(N), VT, N1, N0->getOperand(1));
16448 }
16449 case ISD::FADD:
16450 case ISD::UMAX:
16451 case ISD::UMIN:
16452 case ISD::SMAX:
16453 case ISD::SMIN:
16454 case ISD::FMAXNUM:
16455 case ISD::FMINNUM: {
16456 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16457 return V;
16458 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16459 return V;
16460 return SDValue();
16461 }
16462 case ISD::SETCC:
16463 return performSETCCCombine(N, DAG, Subtarget);
16465 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
16466 case ISD::ZERO_EXTEND:
16467 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
16468 // type legalization. This is safe because fp_to_uint produces poison if
16469 // it overflows.
16470 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
16471 SDValue Src = N->getOperand(0);
16472 if (Src.getOpcode() == ISD::FP_TO_UINT &&
16473 isTypeLegal(Src.getOperand(0).getValueType()))
16474 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
16475 Src.getOperand(0));
16476 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
16477 isTypeLegal(Src.getOperand(1).getValueType())) {
16478 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
16479 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
16480 Src.getOperand(0), Src.getOperand(1));
16481 DCI.CombineTo(N, Res);
16482 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
16483 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
16484 return SDValue(N, 0); // Return N so it doesn't get rechecked.
16485 }
16486 }
16487 return SDValue();
16489 if (SDValue V = combineTruncOfSraSext(N, DAG))
16490 return V;
16491 return combineTruncToVnclip(N, DAG, Subtarget);
16492 case ISD::TRUNCATE:
16493 return performTRUNCATECombine(N, DAG, Subtarget);
16494 case ISD::SELECT:
16495 return performSELECTCombine(N, DAG, Subtarget);
16497 case RISCVISD::CZERO_NEZ: {
16498 SDValue Val = N->getOperand(0);
16499 SDValue Cond = N->getOperand(1);
16500
16501 unsigned Opc = N->getOpcode();
16502
16503 // czero_eqz x, x -> x
16504 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
16505 return Val;
16506
16507 unsigned InvOpc =
16509
16510 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
16511 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
16512 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
16513 SDValue NewCond = Cond.getOperand(0);
16514 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
16515 if (DAG.MaskedValueIsZero(NewCond, Mask))
16516 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
16517 }
16518 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
16519 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
16520 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
16521 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
16522 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
16523 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16524 if (ISD::isIntEqualitySetCC(CCVal))
16525 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
16526 N->getValueType(0), Val, Cond.getOperand(0));
16527 }
16528 return SDValue();
16529 }
16530 case RISCVISD::SELECT_CC: {
16531 // Transform
16532 SDValue LHS = N->getOperand(0);
16533 SDValue RHS = N->getOperand(1);
16534 SDValue CC = N->getOperand(2);
16535 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16536 SDValue TrueV = N->getOperand(3);
16537 SDValue FalseV = N->getOperand(4);
16538 SDLoc DL(N);
16539 EVT VT = N->getValueType(0);
16540
16541 // If the True and False values are the same, we don't need a select_cc.
16542 if (TrueV == FalseV)
16543 return TrueV;
16544
16545 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
16546 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
16547 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
16548 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
16549 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
16550 if (CCVal == ISD::CondCode::SETGE)
16551 std::swap(TrueV, FalseV);
16552
16553 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
16554 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
16555 // Only handle simm12, if it is not in this range, it can be considered as
16556 // register.
16557 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
16558 isInt<12>(TrueSImm - FalseSImm)) {
16559 SDValue SRA =
16560 DAG.getNode(ISD::SRA, DL, VT, LHS,
16561 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
16562 SDValue AND =
16563 DAG.getNode(ISD::AND, DL, VT, SRA,
16564 DAG.getConstant(TrueSImm - FalseSImm, DL, VT));
16565 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
16566 }
16567
16568 if (CCVal == ISD::CondCode::SETGE)
16569 std::swap(TrueV, FalseV);
16570 }
16571
16572 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16573 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
16574 {LHS, RHS, CC, TrueV, FalseV});
16575
16576 if (!Subtarget.hasConditionalMoveFusion()) {
16577 // (select c, -1, y) -> -c | y
16578 if (isAllOnesConstant(TrueV)) {
16579 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16580 SDValue Neg = DAG.getNegative(C, DL, VT);
16581 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
16582 }
16583 // (select c, y, -1) -> -!c | y
16584 if (isAllOnesConstant(FalseV)) {
16585 SDValue C =
16586 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16587 SDValue Neg = DAG.getNegative(C, DL, VT);
16588 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
16589 }
16590
16591 // (select c, 0, y) -> -!c & y
16592 if (isNullConstant(TrueV)) {
16593 SDValue C =
16594 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16595 SDValue Neg = DAG.getNegative(C, DL, VT);
16596 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
16597 }
16598 // (select c, y, 0) -> -c & y
16599 if (isNullConstant(FalseV)) {
16600 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16601 SDValue Neg = DAG.getNegative(C, DL, VT);
16602 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
16603 }
16604 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
16605 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
16606 if (((isOneConstant(FalseV) && LHS == TrueV &&
16607 CCVal == ISD::CondCode::SETNE) ||
16608 (isOneConstant(TrueV) && LHS == FalseV &&
16609 CCVal == ISD::CondCode::SETEQ)) &&
16611 // freeze it to be safe.
16612 LHS = DAG.getFreeze(LHS);
16614 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
16615 }
16616 }
16617
16618 // If both true/false are an xor with 1, pull through the select.
16619 // This can occur after op legalization if both operands are setccs that
16620 // require an xor to invert.
16621 // FIXME: Generalize to other binary ops with identical operand?
16622 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
16623 TrueV.getOperand(1) == FalseV.getOperand(1) &&
16624 isOneConstant(TrueV.getOperand(1)) &&
16625 TrueV.hasOneUse() && FalseV.hasOneUse()) {
16626 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
16627 TrueV.getOperand(0), FalseV.getOperand(0));
16628 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
16629 }
16630
16631 return SDValue();
16632 }
16633 case RISCVISD::BR_CC: {
16634 SDValue LHS = N->getOperand(1);
16635 SDValue RHS = N->getOperand(2);
16636 SDValue CC = N->getOperand(3);
16637 SDLoc DL(N);
16638
16639 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16640 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
16641 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
16642
16643 return SDValue();
16644 }
16645 case ISD::BITREVERSE:
16646 return performBITREVERSECombine(N, DAG, Subtarget);
16647 case ISD::FP_TO_SINT:
16648 case ISD::FP_TO_UINT:
16649 return performFP_TO_INTCombine(N, DCI, Subtarget);
16652 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
16653 case ISD::FCOPYSIGN: {
16654 EVT VT = N->getValueType(0);
16655 if (!VT.isVector())
16656 break;
16657 // There is a form of VFSGNJ which injects the negated sign of its second
16658 // operand. Try and bubble any FNEG up after the extend/round to produce
16659 // this optimized pattern. Avoid modifying cases where FP_ROUND and
16660 // TRUNC=1.
16661 SDValue In2 = N->getOperand(1);
16662 // Avoid cases where the extend/round has multiple uses, as duplicating
16663 // those is typically more expensive than removing a fneg.
16664 if (!In2.hasOneUse())
16665 break;
16666 if (In2.getOpcode() != ISD::FP_EXTEND &&
16667 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
16668 break;
16669 In2 = In2.getOperand(0);
16670 if (In2.getOpcode() != ISD::FNEG)
16671 break;
16672 SDLoc DL(N);
16673 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
16674 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
16675 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
16676 }
16677 case ISD::MGATHER: {
16678 const auto *MGN = cast<MaskedGatherSDNode>(N);
16679 const EVT VT = N->getValueType(0);
16680 SDValue Index = MGN->getIndex();
16681 SDValue ScaleOp = MGN->getScale();
16682 ISD::MemIndexType IndexType = MGN->getIndexType();
16683 assert(!MGN->isIndexScaled() &&
16684 "Scaled gather/scatter should not be formed");
16685
16686 SDLoc DL(N);
16687 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16688 return DAG.getMaskedGather(
16689 N->getVTList(), MGN->getMemoryVT(), DL,
16690 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16691 MGN->getBasePtr(), Index, ScaleOp},
16692 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16693
16694 if (narrowIndex(Index, IndexType, DAG))
16695 return DAG.getMaskedGather(
16696 N->getVTList(), MGN->getMemoryVT(), DL,
16697 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16698 MGN->getBasePtr(), Index, ScaleOp},
16699 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16700
16701 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
16702 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
16703 // The sequence will be XLenVT, not the type of Index. Tell
16704 // isSimpleVIDSequence this so we avoid overflow.
16705 if (std::optional<VIDSequence> SimpleVID =
16706 isSimpleVIDSequence(Index, Subtarget.getXLen());
16707 SimpleVID && SimpleVID->StepDenominator == 1) {
16708 const int64_t StepNumerator = SimpleVID->StepNumerator;
16709 const int64_t Addend = SimpleVID->Addend;
16710
16711 // Note: We don't need to check alignment here since (by assumption
16712 // from the existance of the gather), our offsets must be sufficiently
16713 // aligned.
16714
16715 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
16716 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
16717 assert(IndexType == ISD::UNSIGNED_SCALED);
16718 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
16719 DAG.getConstant(Addend, DL, PtrVT));
16720
16721 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
16723 SDValue StridedLoad =
16724 DAG.getStridedLoadVP(VT, DL, MGN->getChain(), BasePtr,
16725 DAG.getConstant(StepNumerator, DL, XLenVT),
16726 MGN->getMask(), EVL, MGN->getMemOperand());
16727 SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),
16728 StridedLoad, MGN->getPassThru(), EVL);
16729 return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},
16730 DL);
16731 }
16732 }
16733
16734 SmallVector<int> ShuffleMask;
16735 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16736 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
16737 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
16738 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
16739 MGN->getMask(), DAG.getUNDEF(VT),
16740 MGN->getMemoryVT(), MGN->getMemOperand(),
16742 SDValue Shuffle =
16743 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
16744 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
16745 }
16746
16747 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16748 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
16749 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
16750 SmallVector<SDValue> NewIndices;
16751 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
16752 NewIndices.push_back(Index.getOperand(i));
16753 EVT IndexVT = Index.getValueType()
16755 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
16756
16757 unsigned ElementSize = VT.getScalarStoreSize();
16758 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
16759 auto EltCnt = VT.getVectorElementCount();
16760 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
16761 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
16762 EltCnt.divideCoefficientBy(2));
16763 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
16764 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16765 EltCnt.divideCoefficientBy(2));
16766 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
16767
16768 SDValue Gather =
16769 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
16770 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
16771 Index, ScaleOp},
16772 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
16773 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
16774 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
16775 }
16776 break;
16777 }
16778 case ISD::MSCATTER:{
16779 const auto *MSN = cast<MaskedScatterSDNode>(N);
16780 SDValue Index = MSN->getIndex();
16781 SDValue ScaleOp = MSN->getScale();
16782 ISD::MemIndexType IndexType = MSN->getIndexType();
16783 assert(!MSN->isIndexScaled() &&
16784 "Scaled gather/scatter should not be formed");
16785
16786 SDLoc DL(N);
16787 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16788 return DAG.getMaskedScatter(
16789 N->getVTList(), MSN->getMemoryVT(), DL,
16790 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16791 Index, ScaleOp},
16792 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
16793
16794 if (narrowIndex(Index, IndexType, DAG))
16795 return DAG.getMaskedScatter(
16796 N->getVTList(), MSN->getMemoryVT(), DL,
16797 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16798 Index, ScaleOp},
16799 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
16800
16801 EVT VT = MSN->getValue()->getValueType(0);
16802 SmallVector<int> ShuffleMask;
16803 if (!MSN->isTruncatingStore() &&
16804 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
16805 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
16806 DAG.getUNDEF(VT), ShuffleMask);
16807 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
16808 DAG.getUNDEF(XLenVT), MSN->getMask(),
16809 MSN->getMemoryVT(), MSN->getMemOperand(),
16810 ISD::UNINDEXED, false);
16811 }
16812 break;
16813 }
16814 case ISD::VP_GATHER: {
16815 const auto *VPGN = cast<VPGatherSDNode>(N);
16816 SDValue Index = VPGN->getIndex();
16817 SDValue ScaleOp = VPGN->getScale();
16818 ISD::MemIndexType IndexType = VPGN->getIndexType();
16819 assert(!VPGN->isIndexScaled() &&
16820 "Scaled gather/scatter should not be formed");
16821
16822 SDLoc DL(N);
16823 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16824 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
16825 {VPGN->getChain(), VPGN->getBasePtr(), Index,
16826 ScaleOp, VPGN->getMask(),
16827 VPGN->getVectorLength()},
16828 VPGN->getMemOperand(), IndexType);
16829
16830 if (narrowIndex(Index, IndexType, DAG))
16831 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
16832 {VPGN->getChain(), VPGN->getBasePtr(), Index,
16833 ScaleOp, VPGN->getMask(),
16834 VPGN->getVectorLength()},
16835 VPGN->getMemOperand(), IndexType);
16836
16837 break;
16838 }
16839 case ISD::VP_SCATTER: {
16840 const auto *VPSN = cast<VPScatterSDNode>(N);
16841 SDValue Index = VPSN->getIndex();
16842 SDValue ScaleOp = VPSN->getScale();
16843 ISD::MemIndexType IndexType = VPSN->getIndexType();
16844 assert(!VPSN->isIndexScaled() &&
16845 "Scaled gather/scatter should not be formed");
16846
16847 SDLoc DL(N);
16848 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16849 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
16850 {VPSN->getChain(), VPSN->getValue(),
16851 VPSN->getBasePtr(), Index, ScaleOp,
16852 VPSN->getMask(), VPSN->getVectorLength()},
16853 VPSN->getMemOperand(), IndexType);
16854
16855 if (narrowIndex(Index, IndexType, DAG))
16856 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
16857 {VPSN->getChain(), VPSN->getValue(),
16858 VPSN->getBasePtr(), Index, ScaleOp,
16859 VPSN->getMask(), VPSN->getVectorLength()},
16860 VPSN->getMemOperand(), IndexType);
16861 break;
16862 }
16863 case RISCVISD::SHL_VL:
16864 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
16865 return V;
16866 [[fallthrough]];
16867 case RISCVISD::SRA_VL:
16868 case RISCVISD::SRL_VL: {
16869 SDValue ShAmt = N->getOperand(1);
16871 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16872 SDLoc DL(N);
16873 SDValue VL = N->getOperand(4);
16874 EVT VT = N->getValueType(0);
16875 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16876 ShAmt.getOperand(1), VL);
16877 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
16878 N->getOperand(2), N->getOperand(3), N->getOperand(4));
16879 }
16880 break;
16881 }
16882 case ISD::SRA:
16883 if (SDValue V = performSRACombine(N, DAG, Subtarget))
16884 return V;
16885 [[fallthrough]];
16886 case ISD::SRL:
16887 case ISD::SHL: {
16888 if (N->getOpcode() == ISD::SHL) {
16889 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
16890 return V;
16891 }
16892 SDValue ShAmt = N->getOperand(1);
16894 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16895 SDLoc DL(N);
16896 EVT VT = N->getValueType(0);
16897 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16898 ShAmt.getOperand(1),
16899 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
16900 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
16901 }
16902 break;
16903 }
16904 case RISCVISD::ADD_VL:
16905 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
16906 return V;
16907 return combineToVWMACC(N, DAG, Subtarget);
16912 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
16913 case RISCVISD::SUB_VL:
16914 case RISCVISD::MUL_VL:
16915 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
16924 return performVFMADD_VLCombine(N, DCI, Subtarget);
16925 case RISCVISD::FADD_VL:
16926 case RISCVISD::FSUB_VL:
16927 case RISCVISD::FMUL_VL:
16929 case RISCVISD::VFWSUB_W_VL: {
16930 if (N->getValueType(0).getVectorElementType() == MVT::f32 &&
16931 !Subtarget.hasVInstructionsF16())
16932 return SDValue();
16933 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
16934 }
16935 case ISD::LOAD:
16936 case ISD::STORE: {
16937 if (DCI.isAfterLegalizeDAG())
16938 if (SDValue V = performMemPairCombine(N, DCI))
16939 return V;
16940
16941 if (N->getOpcode() != ISD::STORE)
16942 break;
16943
16944 auto *Store = cast<StoreSDNode>(N);
16945 SDValue Chain = Store->getChain();
16946 EVT MemVT = Store->getMemoryVT();
16947 SDValue Val = Store->getValue();
16948 SDLoc DL(N);
16949
16950 bool IsScalarizable =
16951 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
16952 Store->isSimple() &&
16953 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
16954 isPowerOf2_64(MemVT.getSizeInBits()) &&
16955 MemVT.getSizeInBits() <= Subtarget.getXLen();
16956
16957 // If sufficiently aligned we can scalarize stores of constant vectors of
16958 // any power-of-two size up to XLen bits, provided that they aren't too
16959 // expensive to materialize.
16960 // vsetivli zero, 2, e8, m1, ta, ma
16961 // vmv.v.i v8, 4
16962 // vse64.v v8, (a0)
16963 // ->
16964 // li a1, 1028
16965 // sh a1, 0(a0)
16966 if (DCI.isBeforeLegalize() && IsScalarizable &&
16968 // Get the constant vector bits
16969 APInt NewC(Val.getValueSizeInBits(), 0);
16970 uint64_t EltSize = Val.getScalarValueSizeInBits();
16971 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
16972 if (Val.getOperand(i).isUndef())
16973 continue;
16974 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
16975 i * EltSize);
16976 }
16977 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
16978
16979 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
16980 true) <= 2 &&
16982 NewVT, *Store->getMemOperand())) {
16983 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
16984 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
16985 Store->getPointerInfo(), Store->getOriginalAlign(),
16986 Store->getMemOperand()->getFlags());
16987 }
16988 }
16989
16990 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
16991 // vsetivli zero, 2, e16, m1, ta, ma
16992 // vle16.v v8, (a0)
16993 // vse16.v v8, (a1)
16994 if (auto *L = dyn_cast<LoadSDNode>(Val);
16995 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
16996 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
16997 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
16998 L->getMemoryVT() == MemVT) {
16999 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
17001 NewVT, *Store->getMemOperand()) &&
17003 NewVT, *L->getMemOperand())) {
17004 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
17005 L->getPointerInfo(), L->getOriginalAlign(),
17006 L->getMemOperand()->getFlags());
17007 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
17008 Store->getPointerInfo(), Store->getOriginalAlign(),
17009 Store->getMemOperand()->getFlags());
17010 }
17011 }
17012
17013 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
17014 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
17015 // any illegal types.
17016 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
17017 (DCI.isAfterLegalizeDAG() &&
17019 isNullConstant(Val.getOperand(1)))) {
17020 SDValue Src = Val.getOperand(0);
17021 MVT VecVT = Src.getSimpleValueType();
17022 // VecVT should be scalable and memory VT should match the element type.
17023 if (!Store->isIndexed() && VecVT.isScalableVector() &&
17024 MemVT == VecVT.getVectorElementType()) {
17025 SDLoc DL(N);
17026 MVT MaskVT = getMaskTypeFor(VecVT);
17027 return DAG.getStoreVP(
17028 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
17029 DAG.getConstant(1, DL, MaskVT),
17030 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
17031 Store->getMemOperand(), Store->getAddressingMode(),
17032 Store->isTruncatingStore(), /*IsCompress*/ false);
17033 }
17034 }
17035
17036 break;
17037 }
17038 case ISD::SPLAT_VECTOR: {
17039 EVT VT = N->getValueType(0);
17040 // Only perform this combine on legal MVT types.
17041 if (!isTypeLegal(VT))
17042 break;
17043 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
17044 DAG, Subtarget))
17045 return Gather;
17046 break;
17047 }
17048 case ISD::BUILD_VECTOR:
17049 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
17050 return V;
17051 break;
17053 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
17054 return V;
17055 break;
17057 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
17058 return V;
17059 break;
17060 case RISCVISD::VFMV_V_F_VL: {
17061 const MVT VT = N->getSimpleValueType(0);
17062 SDValue Passthru = N->getOperand(0);
17063 SDValue Scalar = N->getOperand(1);
17064 SDValue VL = N->getOperand(2);
17065
17066 // If VL is 1, we can use vfmv.s.f.
17067 if (isOneConstant(VL))
17068 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
17069 break;
17070 }
17071 case RISCVISD::VMV_V_X_VL: {
17072 const MVT VT = N->getSimpleValueType(0);
17073 SDValue Passthru = N->getOperand(0);
17074 SDValue Scalar = N->getOperand(1);
17075 SDValue VL = N->getOperand(2);
17076
17077 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
17078 // scalar input.
17079 unsigned ScalarSize = Scalar.getValueSizeInBits();
17080 unsigned EltWidth = VT.getScalarSizeInBits();
17081 if (ScalarSize > EltWidth && Passthru.isUndef())
17082 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
17083 return SDValue(N, 0);
17084
17085 // If VL is 1 and the scalar value won't benefit from immediate, we can
17086 // use vmv.s.x.
17087 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
17088 if (isOneConstant(VL) &&
17089 (!Const || Const->isZero() ||
17090 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
17091 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
17092
17093 break;
17094 }
17095 case RISCVISD::VFMV_S_F_VL: {
17096 SDValue Src = N->getOperand(1);
17097 // Try to remove vector->scalar->vector if the scalar->vector is inserting
17098 // into an undef vector.
17099 // TODO: Could use a vslide or vmv.v.v for non-undef.
17100 if (N->getOperand(0).isUndef() &&
17101 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17102 isNullConstant(Src.getOperand(1)) &&
17103 Src.getOperand(0).getValueType().isScalableVector()) {
17104 EVT VT = N->getValueType(0);
17105 EVT SrcVT = Src.getOperand(0).getValueType();
17107 // Widths match, just return the original vector.
17108 if (SrcVT == VT)
17109 return Src.getOperand(0);
17110 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
17111 }
17112 [[fallthrough]];
17113 }
17114 case RISCVISD::VMV_S_X_VL: {
17115 const MVT VT = N->getSimpleValueType(0);
17116 SDValue Passthru = N->getOperand(0);
17117 SDValue Scalar = N->getOperand(1);
17118 SDValue VL = N->getOperand(2);
17119
17120 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
17121 Scalar.getOperand(0).getValueType() == N->getValueType(0))
17122 return Scalar.getOperand(0);
17123
17124 // Use M1 or smaller to avoid over constraining register allocation
17125 const MVT M1VT = getLMUL1VT(VT);
17126 if (M1VT.bitsLT(VT)) {
17127 SDValue M1Passthru =
17128 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
17129 DAG.getVectorIdxConstant(0, DL));
17130 SDValue Result =
17131 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
17132 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
17133 DAG.getVectorIdxConstant(0, DL));
17134 return Result;
17135 }
17136
17137 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
17138 // higher would involve overly constraining the register allocator for
17139 // no purpose.
17140 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
17141 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
17142 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
17143 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
17144
17145 break;
17146 }
17147 case RISCVISD::VMV_X_S: {
17148 SDValue Vec = N->getOperand(0);
17149 MVT VecVT = N->getOperand(0).getSimpleValueType();
17150 const MVT M1VT = getLMUL1VT(VecVT);
17151 if (M1VT.bitsLT(VecVT)) {
17152 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
17153 DAG.getVectorIdxConstant(0, DL));
17154 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
17155 }
17156 break;
17157 }
17161 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
17162 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
17163 switch (IntNo) {
17164 // By default we do not combine any intrinsic.
17165 default:
17166 return SDValue();
17167 case Intrinsic::riscv_vcpop:
17168 case Intrinsic::riscv_vcpop_mask:
17169 case Intrinsic::riscv_vfirst:
17170 case Intrinsic::riscv_vfirst_mask: {
17171 SDValue VL = N->getOperand(2);
17172 if (IntNo == Intrinsic::riscv_vcpop_mask ||
17173 IntNo == Intrinsic::riscv_vfirst_mask)
17174 VL = N->getOperand(3);
17175 if (!isNullConstant(VL))
17176 return SDValue();
17177 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
17178 SDLoc DL(N);
17179 EVT VT = N->getValueType(0);
17180 if (IntNo == Intrinsic::riscv_vfirst ||
17181 IntNo == Intrinsic::riscv_vfirst_mask)
17182 return DAG.getConstant(-1, DL, VT);
17183 return DAG.getConstant(0, DL, VT);
17184 }
17185 }
17186 }
17187 case ISD::BITCAST: {
17189 SDValue N0 = N->getOperand(0);
17190 EVT VT = N->getValueType(0);
17191 EVT SrcVT = N0.getValueType();
17192 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
17193 // type, widen both sides to avoid a trip through memory.
17194 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
17195 VT.isScalarInteger()) {
17196 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
17197 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
17198 Ops[0] = N0;
17199 SDLoc DL(N);
17200 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
17201 N0 = DAG.getBitcast(MVT::i8, N0);
17202 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
17203 }
17204
17205 return SDValue();
17206 }
17207 }
17208
17209 return SDValue();
17210}
17211
17213 EVT XVT, unsigned KeptBits) const {
17214 // For vectors, we don't have a preference..
17215 if (XVT.isVector())
17216 return false;
17217
17218 if (XVT != MVT::i32 && XVT != MVT::i64)
17219 return false;
17220
17221 // We can use sext.w for RV64 or an srai 31 on RV32.
17222 if (KeptBits == 32 || KeptBits == 64)
17223 return true;
17224
17225 // With Zbb we can use sext.h/sext.b.
17226 return Subtarget.hasStdExtZbb() &&
17227 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
17228 KeptBits == 16);
17229}
17230
17232 const SDNode *N, CombineLevel Level) const {
17233 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
17234 N->getOpcode() == ISD::SRL) &&
17235 "Expected shift op");
17236
17237 // The following folds are only desirable if `(OP _, c1 << c2)` can be
17238 // materialised in fewer instructions than `(OP _, c1)`:
17239 //
17240 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
17241 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
17242 SDValue N0 = N->getOperand(0);
17243 EVT Ty = N0.getValueType();
17244 if (Ty.isScalarInteger() &&
17245 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
17246 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17247 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17248 if (C1 && C2) {
17249 const APInt &C1Int = C1->getAPIntValue();
17250 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
17251
17252 // We can materialise `c1 << c2` into an add immediate, so it's "free",
17253 // and the combine should happen, to potentially allow further combines
17254 // later.
17255 if (ShiftedC1Int.getSignificantBits() <= 64 &&
17256 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
17257 return true;
17258
17259 // We can materialise `c1` in an add immediate, so it's "free", and the
17260 // combine should be prevented.
17261 if (C1Int.getSignificantBits() <= 64 &&
17263 return false;
17264
17265 // Neither constant will fit into an immediate, so find materialisation
17266 // costs.
17267 int C1Cost =
17268 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
17269 /*CompressionCost*/ true);
17270 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
17271 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
17272 /*CompressionCost*/ true);
17273
17274 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
17275 // combine should be prevented.
17276 if (C1Cost < ShiftedC1Cost)
17277 return false;
17278 }
17279 }
17280 return true;
17281}
17282
17284 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
17285 TargetLoweringOpt &TLO) const {
17286 // Delay this optimization as late as possible.
17287 if (!TLO.LegalOps)
17288 return false;
17289
17290 EVT VT = Op.getValueType();
17291 if (VT.isVector())
17292 return false;
17293
17294 unsigned Opcode = Op.getOpcode();
17295 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
17296 return false;
17297
17298 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17299 if (!C)
17300 return false;
17301
17302 const APInt &Mask = C->getAPIntValue();
17303
17304 // Clear all non-demanded bits initially.
17305 APInt ShrunkMask = Mask & DemandedBits;
17306
17307 // Try to make a smaller immediate by setting undemanded bits.
17308
17309 APInt ExpandedMask = Mask | ~DemandedBits;
17310
17311 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
17312 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
17313 };
17314 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
17315 if (NewMask == Mask)
17316 return true;
17317 SDLoc DL(Op);
17318 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
17319 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
17320 Op.getOperand(0), NewC);
17321 return TLO.CombineTo(Op, NewOp);
17322 };
17323
17324 // If the shrunk mask fits in sign extended 12 bits, let the target
17325 // independent code apply it.
17326 if (ShrunkMask.isSignedIntN(12))
17327 return false;
17328
17329 // And has a few special cases for zext.
17330 if (Opcode == ISD::AND) {
17331 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
17332 // otherwise use SLLI + SRLI.
17333 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
17334 if (IsLegalMask(NewMask))
17335 return UseMask(NewMask);
17336
17337 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
17338 if (VT == MVT::i64) {
17339 APInt NewMask = APInt(64, 0xffffffff);
17340 if (IsLegalMask(NewMask))
17341 return UseMask(NewMask);
17342 }
17343 }
17344
17345 // For the remaining optimizations, we need to be able to make a negative
17346 // number through a combination of mask and undemanded bits.
17347 if (!ExpandedMask.isNegative())
17348 return false;
17349
17350 // What is the fewest number of bits we need to represent the negative number.
17351 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
17352
17353 // Try to make a 12 bit negative immediate. If that fails try to make a 32
17354 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
17355 // If we can't create a simm12, we shouldn't change opaque constants.
17356 APInt NewMask = ShrunkMask;
17357 if (MinSignedBits <= 12)
17358 NewMask.setBitsFrom(11);
17359 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
17360 NewMask.setBitsFrom(31);
17361 else
17362 return false;
17363
17364 // Check that our new mask is a subset of the demanded mask.
17365 assert(IsLegalMask(NewMask));
17366 return UseMask(NewMask);
17367}
17368
17369static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
17370 static const uint64_t GREVMasks[] = {
17371 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
17372 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
17373
17374 for (unsigned Stage = 0; Stage != 6; ++Stage) {
17375 unsigned Shift = 1 << Stage;
17376 if (ShAmt & Shift) {
17377 uint64_t Mask = GREVMasks[Stage];
17378 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
17379 if (IsGORC)
17380 Res |= x;
17381 x = Res;
17382 }
17383 }
17384
17385 return x;
17386}
17387
17389 KnownBits &Known,
17390 const APInt &DemandedElts,
17391 const SelectionDAG &DAG,
17392 unsigned Depth) const {
17393 unsigned BitWidth = Known.getBitWidth();
17394 unsigned Opc = Op.getOpcode();
17395 assert((Opc >= ISD::BUILTIN_OP_END ||
17396 Opc == ISD::INTRINSIC_WO_CHAIN ||
17397 Opc == ISD::INTRINSIC_W_CHAIN ||
17398 Opc == ISD::INTRINSIC_VOID) &&
17399 "Should use MaskedValueIsZero if you don't know whether Op"
17400 " is a target node!");
17401
17402 Known.resetAll();
17403 switch (Opc) {
17404 default: break;
17405 case RISCVISD::SELECT_CC: {
17406 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
17407 // If we don't know any bits, early out.
17408 if (Known.isUnknown())
17409 break;
17410 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
17411
17412 // Only known if known in both the LHS and RHS.
17413 Known = Known.intersectWith(Known2);
17414 break;
17415 }
17418 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17419 // Result is either all zero or operand 0. We can propagate zeros, but not
17420 // ones.
17421 Known.One.clearAllBits();
17422 break;
17423 case RISCVISD::REMUW: {
17424 KnownBits Known2;
17425 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17426 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17427 // We only care about the lower 32 bits.
17428 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
17429 // Restore the original width by sign extending.
17430 Known = Known.sext(BitWidth);
17431 break;
17432 }
17433 case RISCVISD::DIVUW: {
17434 KnownBits Known2;
17435 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17436 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17437 // We only care about the lower 32 bits.
17438 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
17439 // Restore the original width by sign extending.
17440 Known = Known.sext(BitWidth);
17441 break;
17442 }
17443 case RISCVISD::SLLW: {
17444 KnownBits Known2;
17445 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17446 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17447 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
17448 // Restore the original width by sign extending.
17449 Known = Known.sext(BitWidth);
17450 break;
17451 }
17452 case RISCVISD::CTZW: {
17453 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17454 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
17455 unsigned LowBits = llvm::bit_width(PossibleTZ);
17456 Known.Zero.setBitsFrom(LowBits);
17457 break;
17458 }
17459 case RISCVISD::CLZW: {
17460 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17461 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
17462 unsigned LowBits = llvm::bit_width(PossibleLZ);
17463 Known.Zero.setBitsFrom(LowBits);
17464 break;
17465 }
17466 case RISCVISD::BREV8:
17467 case RISCVISD::ORC_B: {
17468 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
17469 // control value of 7 is equivalent to brev8 and orc.b.
17470 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17471 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
17472 // To compute zeros, we need to invert the value and invert it back after.
17473 Known.Zero =
17474 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
17475 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
17476 break;
17477 }
17478 case RISCVISD::READ_VLENB: {
17479 // We can use the minimum and maximum VLEN values to bound VLENB. We
17480 // know VLEN must be a power of two.
17481 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
17482 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
17483 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
17484 Known.Zero.setLowBits(Log2_32(MinVLenB));
17485 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
17486 if (MaxVLenB == MinVLenB)
17487 Known.One.setBit(Log2_32(MinVLenB));
17488 break;
17489 }
17490 case RISCVISD::FCLASS: {
17491 // fclass will only set one of the low 10 bits.
17492 Known.Zero.setBitsFrom(10);
17493 break;
17494 }
17497 unsigned IntNo =
17498 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
17499 switch (IntNo) {
17500 default:
17501 // We can't do anything for most intrinsics.
17502 break;
17503 case Intrinsic::riscv_vsetvli:
17504 case Intrinsic::riscv_vsetvlimax: {
17505 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
17506 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
17507 RISCVII::VLMUL VLMUL =
17508 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
17509 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
17510 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
17511 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
17512 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
17513
17514 // Result of vsetvli must be not larger than AVL.
17515 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
17516 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
17517
17518 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
17519 if (BitWidth > KnownZeroFirstBit)
17520 Known.Zero.setBitsFrom(KnownZeroFirstBit);
17521 break;
17522 }
17523 }
17524 break;
17525 }
17526 }
17527}
17528
17530 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17531 unsigned Depth) const {
17532 switch (Op.getOpcode()) {
17533 default:
17534 break;
17535 case RISCVISD::SELECT_CC: {
17536 unsigned Tmp =
17537 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
17538 if (Tmp == 1) return 1; // Early out.
17539 unsigned Tmp2 =
17540 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
17541 return std::min(Tmp, Tmp2);
17542 }
17545 // Output is either all zero or operand 0. We can propagate sign bit count
17546 // from operand 0.
17547 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17548 case RISCVISD::ABSW: {
17549 // We expand this at isel to negw+max. The result will have 33 sign bits
17550 // if the input has at least 33 sign bits.
17551 unsigned Tmp =
17552 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17553 if (Tmp < 33) return 1;
17554 return 33;
17555 }
17556 case RISCVISD::SLLW:
17557 case RISCVISD::SRAW:
17558 case RISCVISD::SRLW:
17559 case RISCVISD::DIVW:
17560 case RISCVISD::DIVUW:
17561 case RISCVISD::REMUW:
17562 case RISCVISD::ROLW:
17563 case RISCVISD::RORW:
17568 // TODO: As the result is sign-extended, this is conservatively correct. A
17569 // more precise answer could be calculated for SRAW depending on known
17570 // bits in the shift amount.
17571 return 33;
17572 case RISCVISD::VMV_X_S: {
17573 // The number of sign bits of the scalar result is computed by obtaining the
17574 // element type of the input vector operand, subtracting its width from the
17575 // XLEN, and then adding one (sign bit within the element type). If the
17576 // element type is wider than XLen, the least-significant XLEN bits are
17577 // taken.
17578 unsigned XLen = Subtarget.getXLen();
17579 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
17580 if (EltBits <= XLen)
17581 return XLen - EltBits + 1;
17582 break;
17583 }
17585 unsigned IntNo = Op.getConstantOperandVal(1);
17586 switch (IntNo) {
17587 default:
17588 break;
17589 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
17590 case Intrinsic::riscv_masked_atomicrmw_add_i64:
17591 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
17592 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
17593 case Intrinsic::riscv_masked_atomicrmw_max_i64:
17594 case Intrinsic::riscv_masked_atomicrmw_min_i64:
17595 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
17596 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
17597 case Intrinsic::riscv_masked_cmpxchg_i64:
17598 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
17599 // narrow atomic operation. These are implemented using atomic
17600 // operations at the minimum supported atomicrmw/cmpxchg width whose
17601 // result is then sign extended to XLEN. With +A, the minimum width is
17602 // 32 for both 64 and 32.
17603 assert(Subtarget.getXLen() == 64);
17605 assert(Subtarget.hasStdExtA());
17606 return 33;
17607 }
17608 break;
17609 }
17610 }
17611
17612 return 1;
17613}
17614
17616 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17617 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
17618
17619 // TODO: Add more target nodes.
17620 switch (Op.getOpcode()) {
17622 // Integer select_cc cannot create poison.
17623 // TODO: What are the FP poison semantics?
17624 // TODO: This instruction blocks poison from the unselected operand, can
17625 // we do anything with that?
17626 return !Op.getValueType().isInteger();
17627 }
17629 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
17630}
17631
17632const Constant *
17634 assert(Ld && "Unexpected null LoadSDNode");
17635 if (!ISD::isNormalLoad(Ld))
17636 return nullptr;
17637
17638 SDValue Ptr = Ld->getBasePtr();
17639
17640 // Only constant pools with no offset are supported.
17641 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
17642 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
17643 if (!CNode || CNode->isMachineConstantPoolEntry() ||
17644 CNode->getOffset() != 0)
17645 return nullptr;
17646
17647 return CNode;
17648 };
17649
17650 // Simple case, LLA.
17651 if (Ptr.getOpcode() == RISCVISD::LLA) {
17652 auto *CNode = GetSupportedConstantPool(Ptr);
17653 if (!CNode || CNode->getTargetFlags() != 0)
17654 return nullptr;
17655
17656 return CNode->getConstVal();
17657 }
17658
17659 // Look for a HI and ADD_LO pair.
17660 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
17661 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
17662 return nullptr;
17663
17664 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
17665 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
17666
17667 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
17668 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
17669 return nullptr;
17670
17671 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
17672 return nullptr;
17673
17674 return CNodeLo->getConstVal();
17675}
17676
17678 MachineBasicBlock *BB) {
17679 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
17680
17681 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
17682 // Should the count have wrapped while it was being read, we need to try
17683 // again.
17684 // For example:
17685 // ```
17686 // read:
17687 // csrrs x3, counterh # load high word of counter
17688 // csrrs x2, counter # load low word of counter
17689 // csrrs x4, counterh # load high word of counter
17690 // bne x3, x4, read # check if high word reads match, otherwise try again
17691 // ```
17692
17693 MachineFunction &MF = *BB->getParent();
17694 const BasicBlock *LLVMBB = BB->getBasicBlock();
17696
17697 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
17698 MF.insert(It, LoopMBB);
17699
17700 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
17701 MF.insert(It, DoneMBB);
17702
17703 // Transfer the remainder of BB and its successor edges to DoneMBB.
17704 DoneMBB->splice(DoneMBB->begin(), BB,
17705 std::next(MachineBasicBlock::iterator(MI)), BB->end());
17707
17708 BB->addSuccessor(LoopMBB);
17709
17711 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17712 Register LoReg = MI.getOperand(0).getReg();
17713 Register HiReg = MI.getOperand(1).getReg();
17714 int64_t LoCounter = MI.getOperand(2).getImm();
17715 int64_t HiCounter = MI.getOperand(3).getImm();
17716 DebugLoc DL = MI.getDebugLoc();
17717
17719 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
17720 .addImm(HiCounter)
17721 .addReg(RISCV::X0);
17722 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
17723 .addImm(LoCounter)
17724 .addReg(RISCV::X0);
17725 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
17726 .addImm(HiCounter)
17727 .addReg(RISCV::X0);
17728
17729 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
17730 .addReg(HiReg)
17731 .addReg(ReadAgainReg)
17732 .addMBB(LoopMBB);
17733
17734 LoopMBB->addSuccessor(LoopMBB);
17735 LoopMBB->addSuccessor(DoneMBB);
17736
17737 MI.eraseFromParent();
17738
17739 return DoneMBB;
17740}
17741
17744 const RISCVSubtarget &Subtarget) {
17745 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
17746
17747 MachineFunction &MF = *BB->getParent();
17748 DebugLoc DL = MI.getDebugLoc();
17751 Register LoReg = MI.getOperand(0).getReg();
17752 Register HiReg = MI.getOperand(1).getReg();
17753 Register SrcReg = MI.getOperand(2).getReg();
17754
17755 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
17756 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17757
17758 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
17759 RI, Register());
17761 MachineMemOperand *MMOLo =
17765 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
17766 .addFrameIndex(FI)
17767 .addImm(0)
17768 .addMemOperand(MMOLo);
17769 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
17770 .addFrameIndex(FI)
17771 .addImm(4)
17772 .addMemOperand(MMOHi);
17773 MI.eraseFromParent(); // The pseudo instruction is gone now.
17774 return BB;
17775}
17776
17779 const RISCVSubtarget &Subtarget) {
17780 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
17781 "Unexpected instruction");
17782
17783 MachineFunction &MF = *BB->getParent();
17784 DebugLoc DL = MI.getDebugLoc();
17787 Register DstReg = MI.getOperand(0).getReg();
17788 Register LoReg = MI.getOperand(1).getReg();
17789 Register HiReg = MI.getOperand(2).getReg();
17790
17791 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
17792 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17793
17795 MachineMemOperand *MMOLo =
17799 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17800 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
17801 .addFrameIndex(FI)
17802 .addImm(0)
17803 .addMemOperand(MMOLo);
17804 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17805 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
17806 .addFrameIndex(FI)
17807 .addImm(4)
17808 .addMemOperand(MMOHi);
17809 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
17810 MI.eraseFromParent(); // The pseudo instruction is gone now.
17811 return BB;
17812}
17813
17815 switch (MI.getOpcode()) {
17816 default:
17817 return false;
17818 case RISCV::Select_GPR_Using_CC_GPR:
17819 case RISCV::Select_GPR_Using_CC_Imm:
17820 case RISCV::Select_FPR16_Using_CC_GPR:
17821 case RISCV::Select_FPR16INX_Using_CC_GPR:
17822 case RISCV::Select_FPR32_Using_CC_GPR:
17823 case RISCV::Select_FPR32INX_Using_CC_GPR:
17824 case RISCV::Select_FPR64_Using_CC_GPR:
17825 case RISCV::Select_FPR64INX_Using_CC_GPR:
17826 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
17827 return true;
17828 }
17829}
17830
17832 unsigned RelOpcode, unsigned EqOpcode,
17833 const RISCVSubtarget &Subtarget) {
17834 DebugLoc DL = MI.getDebugLoc();
17835 Register DstReg = MI.getOperand(0).getReg();
17836 Register Src1Reg = MI.getOperand(1).getReg();
17837 Register Src2Reg = MI.getOperand(2).getReg();
17839 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17841
17842 // Save the current FFLAGS.
17843 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
17844
17845 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
17846 .addReg(Src1Reg)
17847 .addReg(Src2Reg);
17850
17851 // Restore the FFLAGS.
17852 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
17853 .addReg(SavedFFlags, RegState::Kill);
17854
17855 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
17856 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
17857 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
17858 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
17861
17862 // Erase the pseudoinstruction.
17863 MI.eraseFromParent();
17864 return BB;
17865}
17866
17867static MachineBasicBlock *
17869 MachineBasicBlock *ThisMBB,
17870 const RISCVSubtarget &Subtarget) {
17871 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
17872 // Without this, custom-inserter would have generated:
17873 //
17874 // A
17875 // | \
17876 // | B
17877 // | /
17878 // C
17879 // | \
17880 // | D
17881 // | /
17882 // E
17883 //
17884 // A: X = ...; Y = ...
17885 // B: empty
17886 // C: Z = PHI [X, A], [Y, B]
17887 // D: empty
17888 // E: PHI [X, C], [Z, D]
17889 //
17890 // If we lower both Select_FPRX_ in a single step, we can instead generate:
17891 //
17892 // A
17893 // | \
17894 // | C
17895 // | /|
17896 // |/ |
17897 // | |
17898 // | D
17899 // | /
17900 // E
17901 //
17902 // A: X = ...; Y = ...
17903 // D: empty
17904 // E: PHI [X, A], [X, C], [Y, D]
17905
17906 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17907 const DebugLoc &DL = First.getDebugLoc();
17908 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
17909 MachineFunction *F = ThisMBB->getParent();
17910 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
17911 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
17912 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
17913 MachineFunction::iterator It = ++ThisMBB->getIterator();
17914 F->insert(It, FirstMBB);
17915 F->insert(It, SecondMBB);
17916 F->insert(It, SinkMBB);
17917
17918 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
17919 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
17921 ThisMBB->end());
17922 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
17923
17924 // Fallthrough block for ThisMBB.
17925 ThisMBB->addSuccessor(FirstMBB);
17926 // Fallthrough block for FirstMBB.
17927 FirstMBB->addSuccessor(SecondMBB);
17928 ThisMBB->addSuccessor(SinkMBB);
17929 FirstMBB->addSuccessor(SinkMBB);
17930 // This is fallthrough.
17931 SecondMBB->addSuccessor(SinkMBB);
17932
17933 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
17934 Register FLHS = First.getOperand(1).getReg();
17935 Register FRHS = First.getOperand(2).getReg();
17936 // Insert appropriate branch.
17937 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
17938 .addReg(FLHS)
17939 .addReg(FRHS)
17940 .addMBB(SinkMBB);
17941
17942 Register SLHS = Second.getOperand(1).getReg();
17943 Register SRHS = Second.getOperand(2).getReg();
17944 Register Op1Reg4 = First.getOperand(4).getReg();
17945 Register Op1Reg5 = First.getOperand(5).getReg();
17946
17947 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
17948 // Insert appropriate branch.
17949 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
17950 .addReg(SLHS)
17951 .addReg(SRHS)
17952 .addMBB(SinkMBB);
17953
17954 Register DestReg = Second.getOperand(0).getReg();
17955 Register Op2Reg4 = Second.getOperand(4).getReg();
17956 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
17957 .addReg(Op2Reg4)
17958 .addMBB(ThisMBB)
17959 .addReg(Op1Reg4)
17960 .addMBB(FirstMBB)
17961 .addReg(Op1Reg5)
17962 .addMBB(SecondMBB);
17963
17964 // Now remove the Select_FPRX_s.
17965 First.eraseFromParent();
17966 Second.eraseFromParent();
17967 return SinkMBB;
17968}
17969
17972 const RISCVSubtarget &Subtarget) {
17973 // To "insert" Select_* instructions, we actually have to insert the triangle
17974 // control-flow pattern. The incoming instructions know the destination vreg
17975 // to set, the condition code register to branch on, the true/false values to
17976 // select between, and the condcode to use to select the appropriate branch.
17977 //
17978 // We produce the following control flow:
17979 // HeadMBB
17980 // | \
17981 // | IfFalseMBB
17982 // | /
17983 // TailMBB
17984 //
17985 // When we find a sequence of selects we attempt to optimize their emission
17986 // by sharing the control flow. Currently we only handle cases where we have
17987 // multiple selects with the exact same condition (same LHS, RHS and CC).
17988 // The selects may be interleaved with other instructions if the other
17989 // instructions meet some requirements we deem safe:
17990 // - They are not pseudo instructions.
17991 // - They are debug instructions. Otherwise,
17992 // - They do not have side-effects, do not access memory and their inputs do
17993 // not depend on the results of the select pseudo-instructions.
17994 // The TrueV/FalseV operands of the selects cannot depend on the result of
17995 // previous selects in the sequence.
17996 // These conditions could be further relaxed. See the X86 target for a
17997 // related approach and more information.
17998 //
17999 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
18000 // is checked here and handled by a separate function -
18001 // EmitLoweredCascadedSelect.
18002
18003 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
18004 if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
18005 MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
18006 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
18007 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
18008 Next->getOperand(5).isKill())
18009 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
18010
18011 Register LHS = MI.getOperand(1).getReg();
18012 Register RHS;
18013 if (MI.getOperand(2).isReg())
18014 RHS = MI.getOperand(2).getReg();
18015 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
18016
18017 SmallVector<MachineInstr *, 4> SelectDebugValues;
18018 SmallSet<Register, 4> SelectDests;
18019 SelectDests.insert(MI.getOperand(0).getReg());
18020
18021 MachineInstr *LastSelectPseudo = &MI;
18022 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
18023 SequenceMBBI != E; ++SequenceMBBI) {
18024 if (SequenceMBBI->isDebugInstr())
18025 continue;
18026 if (isSelectPseudo(*SequenceMBBI)) {
18027 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
18028 !SequenceMBBI->getOperand(2).isReg() ||
18029 SequenceMBBI->getOperand(2).getReg() != RHS ||
18030 SequenceMBBI->getOperand(3).getImm() != CC ||
18031 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
18032 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
18033 break;
18034 LastSelectPseudo = &*SequenceMBBI;
18035 SequenceMBBI->collectDebugValues(SelectDebugValues);
18036 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
18037 continue;
18038 }
18039 if (SequenceMBBI->hasUnmodeledSideEffects() ||
18040 SequenceMBBI->mayLoadOrStore() ||
18041 SequenceMBBI->usesCustomInsertionHook())
18042 break;
18043 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
18044 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
18045 }))
18046 break;
18047 }
18048
18049 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
18050 const BasicBlock *LLVM_BB = BB->getBasicBlock();
18051 DebugLoc DL = MI.getDebugLoc();
18053
18054 MachineBasicBlock *HeadMBB = BB;
18055 MachineFunction *F = BB->getParent();
18056 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
18057 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
18058
18059 F->insert(I, IfFalseMBB);
18060 F->insert(I, TailMBB);
18061
18062 // Set the call frame size on entry to the new basic blocks.
18063 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
18064 IfFalseMBB->setCallFrameSize(CallFrameSize);
18065 TailMBB->setCallFrameSize(CallFrameSize);
18066
18067 // Transfer debug instructions associated with the selects to TailMBB.
18068 for (MachineInstr *DebugInstr : SelectDebugValues) {
18069 TailMBB->push_back(DebugInstr->removeFromParent());
18070 }
18071
18072 // Move all instructions after the sequence to TailMBB.
18073 TailMBB->splice(TailMBB->end(), HeadMBB,
18074 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
18075 // Update machine-CFG edges by transferring all successors of the current
18076 // block to the new block which will contain the Phi nodes for the selects.
18077 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
18078 // Set the successors for HeadMBB.
18079 HeadMBB->addSuccessor(IfFalseMBB);
18080 HeadMBB->addSuccessor(TailMBB);
18081
18082 // Insert appropriate branch.
18083 if (MI.getOperand(2).isImm())
18084 BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm()))
18085 .addReg(LHS)
18086 .addImm(MI.getOperand(2).getImm())
18087 .addMBB(TailMBB);
18088 else
18089 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
18090 .addReg(LHS)
18091 .addReg(RHS)
18092 .addMBB(TailMBB);
18093
18094 // IfFalseMBB just falls through to TailMBB.
18095 IfFalseMBB->addSuccessor(TailMBB);
18096
18097 // Create PHIs for all of the select pseudo-instructions.
18098 auto SelectMBBI = MI.getIterator();
18099 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
18100 auto InsertionPoint = TailMBB->begin();
18101 while (SelectMBBI != SelectEnd) {
18102 auto Next = std::next(SelectMBBI);
18103 if (isSelectPseudo(*SelectMBBI)) {
18104 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
18105 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
18106 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
18107 .addReg(SelectMBBI->getOperand(4).getReg())
18108 .addMBB(HeadMBB)
18109 .addReg(SelectMBBI->getOperand(5).getReg())
18110 .addMBB(IfFalseMBB);
18111 SelectMBBI->eraseFromParent();
18112 }
18113 SelectMBBI = Next;
18114 }
18115
18116 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
18117 return TailMBB;
18118}
18119
18120// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
18121static const RISCV::RISCVMaskedPseudoInfo *
18122lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
18124 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
18125 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
18127 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
18128 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
18129 return Masked;
18130}
18131
18134 unsigned CVTXOpc) {
18135 DebugLoc DL = MI.getDebugLoc();
18136
18138
18140 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18141
18142 // Save the old value of FFLAGS.
18143 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
18144
18145 assert(MI.getNumOperands() == 7);
18146
18147 // Emit a VFCVT_X_F
18148 const TargetRegisterInfo *TRI =
18150 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
18151 Register Tmp = MRI.createVirtualRegister(RC);
18152 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
18153 .add(MI.getOperand(1))
18154 .add(MI.getOperand(2))
18155 .add(MI.getOperand(3))
18156 .add(MachineOperand::CreateImm(7)) // frm = DYN
18157 .add(MI.getOperand(4))
18158 .add(MI.getOperand(5))
18159 .add(MI.getOperand(6))
18160 .add(MachineOperand::CreateReg(RISCV::FRM,
18161 /*IsDef*/ false,
18162 /*IsImp*/ true));
18163
18164 // Emit a VFCVT_F_X
18165 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
18166 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
18167 // There is no E8 variant for VFCVT_F_X.
18168 assert(Log2SEW >= 4);
18169 unsigned CVTFOpc =
18170 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
18171 ->MaskedPseudo;
18172
18173 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
18174 .add(MI.getOperand(0))
18175 .add(MI.getOperand(1))
18176 .addReg(Tmp)
18177 .add(MI.getOperand(3))
18178 .add(MachineOperand::CreateImm(7)) // frm = DYN
18179 .add(MI.getOperand(4))
18180 .add(MI.getOperand(5))
18181 .add(MI.getOperand(6))
18182 .add(MachineOperand::CreateReg(RISCV::FRM,
18183 /*IsDef*/ false,
18184 /*IsImp*/ true));
18185
18186 // Restore FFLAGS.
18187 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
18188 .addReg(SavedFFLAGS, RegState::Kill);
18189
18190 // Erase the pseudoinstruction.
18191 MI.eraseFromParent();
18192 return BB;
18193}
18194
18196 const RISCVSubtarget &Subtarget) {
18197 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
18198 const TargetRegisterClass *RC;
18199 switch (MI.getOpcode()) {
18200 default:
18201 llvm_unreachable("Unexpected opcode");
18202 case RISCV::PseudoFROUND_H:
18203 CmpOpc = RISCV::FLT_H;
18204 F2IOpc = RISCV::FCVT_W_H;
18205 I2FOpc = RISCV::FCVT_H_W;
18206 FSGNJOpc = RISCV::FSGNJ_H;
18207 FSGNJXOpc = RISCV::FSGNJX_H;
18208 RC = &RISCV::FPR16RegClass;
18209 break;
18210 case RISCV::PseudoFROUND_H_INX:
18211 CmpOpc = RISCV::FLT_H_INX;
18212 F2IOpc = RISCV::FCVT_W_H_INX;
18213 I2FOpc = RISCV::FCVT_H_W_INX;
18214 FSGNJOpc = RISCV::FSGNJ_H_INX;
18215 FSGNJXOpc = RISCV::FSGNJX_H_INX;
18216 RC = &RISCV::GPRF16RegClass;
18217 break;
18218 case RISCV::PseudoFROUND_S:
18219 CmpOpc = RISCV::FLT_S;
18220 F2IOpc = RISCV::FCVT_W_S;
18221 I2FOpc = RISCV::FCVT_S_W;
18222 FSGNJOpc = RISCV::FSGNJ_S;
18223 FSGNJXOpc = RISCV::FSGNJX_S;
18224 RC = &RISCV::FPR32RegClass;
18225 break;
18226 case RISCV::PseudoFROUND_S_INX:
18227 CmpOpc = RISCV::FLT_S_INX;
18228 F2IOpc = RISCV::FCVT_W_S_INX;
18229 I2FOpc = RISCV::FCVT_S_W_INX;
18230 FSGNJOpc = RISCV::FSGNJ_S_INX;
18231 FSGNJXOpc = RISCV::FSGNJX_S_INX;
18232 RC = &RISCV::GPRF32RegClass;
18233 break;
18234 case RISCV::PseudoFROUND_D:
18235 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
18236 CmpOpc = RISCV::FLT_D;
18237 F2IOpc = RISCV::FCVT_L_D;
18238 I2FOpc = RISCV::FCVT_D_L;
18239 FSGNJOpc = RISCV::FSGNJ_D;
18240 FSGNJXOpc = RISCV::FSGNJX_D;
18241 RC = &RISCV::FPR64RegClass;
18242 break;
18243 case RISCV::PseudoFROUND_D_INX:
18244 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
18245 CmpOpc = RISCV::FLT_D_INX;
18246 F2IOpc = RISCV::FCVT_L_D_INX;
18247 I2FOpc = RISCV::FCVT_D_L_INX;
18248 FSGNJOpc = RISCV::FSGNJ_D_INX;
18249 FSGNJXOpc = RISCV::FSGNJX_D_INX;
18250 RC = &RISCV::GPRRegClass;
18251 break;
18252 }
18253
18254 const BasicBlock *BB = MBB->getBasicBlock();
18255 DebugLoc DL = MI.getDebugLoc();
18257
18259 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
18260 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
18261
18262 F->insert(I, CvtMBB);
18263 F->insert(I, DoneMBB);
18264 // Move all instructions after the sequence to DoneMBB.
18265 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
18266 MBB->end());
18267 // Update machine-CFG edges by transferring all successors of the current
18268 // block to the new block which will contain the Phi nodes for the selects.
18270 // Set the successors for MBB.
18271 MBB->addSuccessor(CvtMBB);
18272 MBB->addSuccessor(DoneMBB);
18273
18274 Register DstReg = MI.getOperand(0).getReg();
18275 Register SrcReg = MI.getOperand(1).getReg();
18276 Register MaxReg = MI.getOperand(2).getReg();
18277 int64_t FRM = MI.getOperand(3).getImm();
18278
18279 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
18281
18282 Register FabsReg = MRI.createVirtualRegister(RC);
18283 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
18284
18285 // Compare the FP value to the max value.
18286 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18287 auto MIB =
18288 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
18291
18292 // Insert branch.
18293 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
18294 .addReg(CmpReg)
18295 .addReg(RISCV::X0)
18296 .addMBB(DoneMBB);
18297
18298 CvtMBB->addSuccessor(DoneMBB);
18299
18300 // Convert to integer.
18301 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18302 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
18305
18306 // Convert back to FP.
18307 Register I2FReg = MRI.createVirtualRegister(RC);
18308 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
18311
18312 // Restore the sign bit.
18313 Register CvtReg = MRI.createVirtualRegister(RC);
18314 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
18315
18316 // Merge the results.
18317 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
18318 .addReg(SrcReg)
18319 .addMBB(MBB)
18320 .addReg(CvtReg)
18321 .addMBB(CvtMBB);
18322
18323 MI.eraseFromParent();
18324 return DoneMBB;
18325}
18326
18329 MachineBasicBlock *BB) const {
18330 switch (MI.getOpcode()) {
18331 default:
18332 llvm_unreachable("Unexpected instr type to insert");
18333 case RISCV::ReadCounterWide:
18334 assert(!Subtarget.is64Bit() &&
18335 "ReadCounterWide is only to be used on riscv32");
18336 return emitReadCounterWidePseudo(MI, BB);
18337 case RISCV::Select_GPR_Using_CC_GPR:
18338 case RISCV::Select_GPR_Using_CC_Imm:
18339 case RISCV::Select_FPR16_Using_CC_GPR:
18340 case RISCV::Select_FPR16INX_Using_CC_GPR:
18341 case RISCV::Select_FPR32_Using_CC_GPR:
18342 case RISCV::Select_FPR32INX_Using_CC_GPR:
18343 case RISCV::Select_FPR64_Using_CC_GPR:
18344 case RISCV::Select_FPR64INX_Using_CC_GPR:
18345 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
18346 return emitSelectPseudo(MI, BB, Subtarget);
18347 case RISCV::BuildPairF64Pseudo:
18348 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
18349 case RISCV::SplitF64Pseudo:
18350 return emitSplitF64Pseudo(MI, BB, Subtarget);
18351 case RISCV::PseudoQuietFLE_H:
18352 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
18353 case RISCV::PseudoQuietFLE_H_INX:
18354 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
18355 case RISCV::PseudoQuietFLT_H:
18356 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
18357 case RISCV::PseudoQuietFLT_H_INX:
18358 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
18359 case RISCV::PseudoQuietFLE_S:
18360 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
18361 case RISCV::PseudoQuietFLE_S_INX:
18362 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
18363 case RISCV::PseudoQuietFLT_S:
18364 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
18365 case RISCV::PseudoQuietFLT_S_INX:
18366 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
18367 case RISCV::PseudoQuietFLE_D:
18368 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
18369 case RISCV::PseudoQuietFLE_D_INX:
18370 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
18371 case RISCV::PseudoQuietFLE_D_IN32X:
18372 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
18373 Subtarget);
18374 case RISCV::PseudoQuietFLT_D:
18375 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
18376 case RISCV::PseudoQuietFLT_D_INX:
18377 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
18378 case RISCV::PseudoQuietFLT_D_IN32X:
18379 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
18380 Subtarget);
18381
18382 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
18383 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
18384 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
18385 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
18386 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
18387 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
18388 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
18389 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
18390 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
18391 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
18392 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
18393 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
18394 case RISCV::PseudoFROUND_H:
18395 case RISCV::PseudoFROUND_H_INX:
18396 case RISCV::PseudoFROUND_S:
18397 case RISCV::PseudoFROUND_S_INX:
18398 case RISCV::PseudoFROUND_D:
18399 case RISCV::PseudoFROUND_D_INX:
18400 case RISCV::PseudoFROUND_D_IN32X:
18401 return emitFROUND(MI, BB, Subtarget);
18402 case TargetOpcode::STATEPOINT:
18403 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
18404 // while jal call instruction (where statepoint will be lowered at the end)
18405 // has implicit def. This def is early-clobber as it will be set at
18406 // the moment of the call and earlier than any use is read.
18407 // Add this implicit dead def here as a workaround.
18408 MI.addOperand(*MI.getMF(),
18410 RISCV::X1, /*isDef*/ true,
18411 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
18412 /*isUndef*/ false, /*isEarlyClobber*/ true));
18413 [[fallthrough]];
18414 case TargetOpcode::STACKMAP:
18415 case TargetOpcode::PATCHPOINT:
18416 if (!Subtarget.is64Bit())
18417 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
18418 "supported on 64-bit targets");
18419 return emitPatchPoint(MI, BB);
18420 }
18421}
18422
18424 SDNode *Node) const {
18425 // Add FRM dependency to any instructions with dynamic rounding mode.
18426 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
18427 if (Idx < 0) {
18428 // Vector pseudos have FRM index indicated by TSFlags.
18429 Idx = RISCVII::getFRMOpNum(MI.getDesc());
18430 if (Idx < 0)
18431 return;
18432 }
18433 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
18434 return;
18435 // If the instruction already reads FRM, don't add another read.
18436 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
18437 return;
18438 MI.addOperand(
18439 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
18440}
18441
18442// Calling Convention Implementation.
18443// The expectations for frontend ABI lowering vary from target to target.
18444// Ideally, an LLVM frontend would be able to avoid worrying about many ABI
18445// details, but this is a longer term goal. For now, we simply try to keep the
18446// role of the frontend as simple and well-defined as possible. The rules can
18447// be summarised as:
18448// * Never split up large scalar arguments. We handle them here.
18449// * If a hardfloat calling convention is being used, and the struct may be
18450// passed in a pair of registers (fp+fp, int+fp), and both registers are
18451// available, then pass as two separate arguments. If either the GPRs or FPRs
18452// are exhausted, then pass according to the rule below.
18453// * If a struct could never be passed in registers or directly in a stack
18454// slot (as it is larger than 2*XLEN and the floating point rules don't
18455// apply), then pass it using a pointer with the byval attribute.
18456// * If a struct is less than 2*XLEN, then coerce to either a two-element
18457// word-sized array or a 2*XLEN scalar (depending on alignment).
18458// * The frontend can determine whether a struct is returned by reference or
18459// not based on its size and fields. If it will be returned by reference, the
18460// frontend must modify the prototype so a pointer with the sret annotation is
18461// passed as the first argument. This is not necessary for large scalar
18462// returns.
18463// * Struct return values and varargs should be coerced to structs containing
18464// register-size fields in the same situations they would be for fixed
18465// arguments.
18466
18467static const MCPhysReg ArgFPR16s[] = {
18468 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
18469 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
18470};
18471static const MCPhysReg ArgFPR32s[] = {
18472 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
18473 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
18474};
18475static const MCPhysReg ArgFPR64s[] = {
18476 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
18477 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
18478};
18479// This is an interim calling convention and it may be changed in the future.
18480static const MCPhysReg ArgVRs[] = {
18481 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
18482 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
18483 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
18484static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
18485 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
18486 RISCV::V20M2, RISCV::V22M2};
18487static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
18488 RISCV::V20M4};
18489static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
18490
18492 // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
18493 // the ILP32E ABI.
18494 static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18495 RISCV::X13, RISCV::X14, RISCV::X15,
18496 RISCV::X16, RISCV::X17};
18497 // The GPRs used for passing arguments in the ILP32E/ILP64E ABI.
18498 static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18499 RISCV::X13, RISCV::X14, RISCV::X15};
18500
18501 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18502 return ArrayRef(ArgEGPRs);
18503
18504 return ArrayRef(ArgIGPRs);
18505}
18506
18508 // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
18509 // for save-restore libcall, so we don't use them.
18510 // Don't use X7 for fastcc, since Zicfilp uses X7 as the label register.
18511 static const MCPhysReg FastCCIGPRs[] = {
18512 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15,
18513 RISCV::X16, RISCV::X17, RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31};
18514
18515 // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
18516 static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18517 RISCV::X13, RISCV::X14, RISCV::X15};
18518
18519 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18520 return ArrayRef(FastCCEGPRs);
18521
18522 return ArrayRef(FastCCIGPRs);
18523}
18524
18525// Pass a 2*XLEN argument that has been split into two XLEN values through
18526// registers or the stack as necessary.
18527static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
18528 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
18529 MVT ValVT2, MVT LocVT2,
18530 ISD::ArgFlagsTy ArgFlags2, bool EABI) {
18531 unsigned XLenInBytes = XLen / 8;
18532 const RISCVSubtarget &STI =
18535
18536 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18537 // At least one half can be passed via register.
18538 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
18539 VA1.getLocVT(), CCValAssign::Full));
18540 } else {
18541 // Both halves must be passed on the stack, with proper alignment.
18542 // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte
18543 // alignment. This behavior may be changed when RV32E/ILP32E is ratified.
18544 Align StackAlign(XLenInBytes);
18545 if (!EABI || XLen != 32)
18546 StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign());
18547 State.addLoc(
18549 State.AllocateStack(XLenInBytes, StackAlign),
18550 VA1.getLocVT(), CCValAssign::Full));
18552 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18553 LocVT2, CCValAssign::Full));
18554 return false;
18555 }
18556
18557 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18558 // The second half can also be passed via register.
18559 State.addLoc(
18560 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
18561 } else {
18562 // The second half is passed via the stack, without additional alignment.
18564 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18565 LocVT2, CCValAssign::Full));
18566 }
18567
18568 return false;
18569}
18570
18571// Implements the RISC-V calling convention. Returns true upon failure.
18572bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
18573 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
18574 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
18575 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
18576 RVVArgDispatcher &RVVDispatcher) {
18577 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
18578 assert(XLen == 32 || XLen == 64);
18579 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
18580
18581 // Static chain parameter must not be passed in normal argument registers,
18582 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
18583 if (ArgFlags.isNest()) {
18584 if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
18585 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18586 return false;
18587 }
18588 }
18589
18590 // Any return value split in to more than two values can't be returned
18591 // directly. Vectors are returned via the available vector registers.
18592 if (!LocVT.isVector() && IsRet && ValNo > 1)
18593 return true;
18594
18595 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
18596 // variadic argument, or if no F16/F32 argument registers are available.
18597 bool UseGPRForF16_F32 = true;
18598 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
18599 // variadic argument, or if no F64 argument registers are available.
18600 bool UseGPRForF64 = true;
18601
18602 switch (ABI) {
18603 default:
18604 llvm_unreachable("Unexpected ABI");
18607 case RISCVABI::ABI_LP64:
18609 break;
18612 UseGPRForF16_F32 = !IsFixed;
18613 break;
18616 UseGPRForF16_F32 = !IsFixed;
18617 UseGPRForF64 = !IsFixed;
18618 break;
18619 }
18620
18621 // FPR16, FPR32, and FPR64 alias each other.
18622 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
18623 UseGPRForF16_F32 = true;
18624 UseGPRForF64 = true;
18625 }
18626
18627 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
18628 // similar local variables rather than directly checking against the target
18629 // ABI.
18630
18631 if (UseGPRForF16_F32 &&
18632 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
18633 LocVT = XLenVT;
18634 LocInfo = CCValAssign::BCvt;
18635 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
18636 LocVT = MVT::i64;
18637 LocInfo = CCValAssign::BCvt;
18638 }
18639
18641
18642 // If this is a variadic argument, the RISC-V calling convention requires
18643 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
18644 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
18645 // be used regardless of whether the original argument was split during
18646 // legalisation or not. The argument will not be passed by registers if the
18647 // original type is larger than 2*XLEN, so the register alignment rule does
18648 // not apply.
18649 // TODO: To be compatible with GCC's behaviors, we don't align registers
18650 // currently if we are using ILP32E calling convention. This behavior may be
18651 // changed when RV32E/ILP32E is ratified.
18652 unsigned TwoXLenInBytes = (2 * XLen) / 8;
18653 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
18654 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes &&
18655 ABI != RISCVABI::ABI_ILP32E) {
18656 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
18657 // Skip 'odd' register if necessary.
18658 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
18659 State.AllocateReg(ArgGPRs);
18660 }
18661
18662 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
18663 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
18664 State.getPendingArgFlags();
18665
18666 assert(PendingLocs.size() == PendingArgFlags.size() &&
18667 "PendingLocs and PendingArgFlags out of sync");
18668
18669 // Handle passing f64 on RV32D with a soft float ABI or when floating point
18670 // registers are exhausted.
18671 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
18672 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
18673 // Depending on available argument GPRS, f64 may be passed in a pair of
18674 // GPRs, split between a GPR and the stack, or passed completely on the
18675 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
18676 // cases.
18677 Register Reg = State.AllocateReg(ArgGPRs);
18678 if (!Reg) {
18679 unsigned StackOffset = State.AllocateStack(8, Align(8));
18680 State.addLoc(
18681 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18682 return false;
18683 }
18684 LocVT = MVT::i32;
18685 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18686 Register HiReg = State.AllocateReg(ArgGPRs);
18687 if (HiReg) {
18688 State.addLoc(
18689 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
18690 } else {
18691 unsigned StackOffset = State.AllocateStack(4, Align(4));
18692 State.addLoc(
18693 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18694 }
18695 return false;
18696 }
18697
18698 // Fixed-length vectors are located in the corresponding scalable-vector
18699 // container types.
18700 if (ValVT.isFixedLengthVector())
18701 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
18702
18703 // Split arguments might be passed indirectly, so keep track of the pending
18704 // values. Split vectors are passed via a mix of registers and indirectly, so
18705 // treat them as we would any other argument.
18706 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
18707 LocVT = XLenVT;
18708 LocInfo = CCValAssign::Indirect;
18709 PendingLocs.push_back(
18710 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
18711 PendingArgFlags.push_back(ArgFlags);
18712 if (!ArgFlags.isSplitEnd()) {
18713 return false;
18714 }
18715 }
18716
18717 // If the split argument only had two elements, it should be passed directly
18718 // in registers or on the stack.
18719 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
18720 PendingLocs.size() <= 2) {
18721 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
18722 // Apply the normal calling convention rules to the first half of the
18723 // split argument.
18724 CCValAssign VA = PendingLocs[0];
18725 ISD::ArgFlagsTy AF = PendingArgFlags[0];
18726 PendingLocs.clear();
18727 PendingArgFlags.clear();
18728 return CC_RISCVAssign2XLen(
18729 XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags,
18730 ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E);
18731 }
18732
18733 // Allocate to a register if possible, or else a stack slot.
18734 Register Reg;
18735 unsigned StoreSizeBytes = XLen / 8;
18736 Align StackAlign = Align(XLen / 8);
18737
18738 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
18739 Reg = State.AllocateReg(ArgFPR16s);
18740 else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
18741 Reg = State.AllocateReg(ArgFPR32s);
18742 else if (ValVT == MVT::f64 && !UseGPRForF64)
18743 Reg = State.AllocateReg(ArgFPR64s);
18744 else if (ValVT.isVector()) {
18745 Reg = RVVDispatcher.getNextPhysReg();
18746 if (!Reg) {
18747 // For return values, the vector must be passed fully via registers or
18748 // via the stack.
18749 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
18750 // but we're using all of them.
18751 if (IsRet)
18752 return true;
18753 // Try using a GPR to pass the address
18754 if ((Reg = State.AllocateReg(ArgGPRs))) {
18755 LocVT = XLenVT;
18756 LocInfo = CCValAssign::Indirect;
18757 } else if (ValVT.isScalableVector()) {
18758 LocVT = XLenVT;
18759 LocInfo = CCValAssign::Indirect;
18760 } else {
18761 // Pass fixed-length vectors on the stack.
18762 LocVT = ValVT;
18763 StoreSizeBytes = ValVT.getStoreSize();
18764 // Align vectors to their element sizes, being careful for vXi1
18765 // vectors.
18766 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
18767 }
18768 }
18769 } else {
18770 Reg = State.AllocateReg(ArgGPRs);
18771 }
18772
18773 unsigned StackOffset =
18774 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
18775
18776 // If we reach this point and PendingLocs is non-empty, we must be at the
18777 // end of a split argument that must be passed indirectly.
18778 if (!PendingLocs.empty()) {
18779 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
18780 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
18781
18782 for (auto &It : PendingLocs) {
18783 if (Reg)
18784 It.convertToReg(Reg);
18785 else
18786 It.convertToMem(StackOffset);
18787 State.addLoc(It);
18788 }
18789 PendingLocs.clear();
18790 PendingArgFlags.clear();
18791 return false;
18792 }
18793
18794 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
18795 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
18796 "Expected an XLenVT or vector types at this stage");
18797
18798 if (Reg) {
18799 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18800 return false;
18801 }
18802
18803 // When a scalar floating-point value is passed on the stack, no
18804 // bit-conversion is needed.
18805 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
18806 assert(!ValVT.isVector());
18807 LocVT = ValVT;
18808 LocInfo = CCValAssign::Full;
18809 }
18810 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18811 return false;
18812}
18813
18814template <typename ArgTy>
18815static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
18816 for (const auto &ArgIdx : enumerate(Args)) {
18817 MVT ArgVT = ArgIdx.value().VT;
18818 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
18819 return ArgIdx.index();
18820 }
18821 return std::nullopt;
18822}
18823
18824void RISCVTargetLowering::analyzeInputArgs(
18825 MachineFunction &MF, CCState &CCInfo,
18826 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
18827 RISCVCCAssignFn Fn) const {
18828 unsigned NumArgs = Ins.size();
18830
18831 RVVArgDispatcher Dispatcher;
18832 if (IsRet) {
18833 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)};
18834 } else {
18835 SmallVector<Type *, 4> TypeList;
18836 for (const Argument &Arg : MF.getFunction().args())
18837 TypeList.push_back(Arg.getType());
18838 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)};
18839 }
18840
18841 for (unsigned i = 0; i != NumArgs; ++i) {
18842 MVT ArgVT = Ins[i].VT;
18843 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
18844
18845 Type *ArgTy = nullptr;
18846 if (IsRet)
18847 ArgTy = FType->getReturnType();
18848 else if (Ins[i].isOrigArg())
18849 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
18850
18852 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18853 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
18854 Dispatcher)) {
18855 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
18856 << ArgVT << '\n');
18857 llvm_unreachable(nullptr);
18858 }
18859 }
18860}
18861
18862void RISCVTargetLowering::analyzeOutputArgs(
18863 MachineFunction &MF, CCState &CCInfo,
18864 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
18865 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
18866 unsigned NumArgs = Outs.size();
18867
18868 SmallVector<Type *, 4> TypeList;
18869 if (IsRet)
18870 TypeList.push_back(MF.getFunction().getReturnType());
18871 else if (CLI)
18872 for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs())
18873 TypeList.push_back(Arg.Ty);
18874 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)};
18875
18876 for (unsigned i = 0; i != NumArgs; i++) {
18877 MVT ArgVT = Outs[i].VT;
18878 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
18879 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
18880
18882 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18883 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
18884 Dispatcher)) {
18885 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
18886 << ArgVT << "\n");
18887 llvm_unreachable(nullptr);
18888 }
18889 }
18890}
18891
18892// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
18893// values.
18895 const CCValAssign &VA, const SDLoc &DL,
18896 const RISCVSubtarget &Subtarget) {
18897 switch (VA.getLocInfo()) {
18898 default:
18899 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18900 case CCValAssign::Full:
18902 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
18903 break;
18904 case CCValAssign::BCvt:
18905 if (VA.getLocVT().isInteger() &&
18906 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
18907 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
18908 } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
18909 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
18910 } else {
18911 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
18912 }
18913 break;
18914 }
18915 return Val;
18916}
18917
18918// The caller is responsible for loading the full value if the argument is
18919// passed with CCValAssign::Indirect.
18921 const CCValAssign &VA, const SDLoc &DL,
18922 const ISD::InputArg &In,
18923 const RISCVTargetLowering &TLI) {
18926 EVT LocVT = VA.getLocVT();
18927 SDValue Val;
18928 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
18929 Register VReg = RegInfo.createVirtualRegister(RC);
18930 RegInfo.addLiveIn(VA.getLocReg(), VReg);
18931 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
18932
18933 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
18934 if (In.isOrigArg()) {
18935 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
18936 if (OrigArg->getType()->isIntegerTy()) {
18937 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
18938 // An input zero extended from i31 can also be considered sign extended.
18939 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
18940 (BitWidth < 32 && In.Flags.isZExt())) {
18942 RVFI->addSExt32Register(VReg);
18943 }
18944 }
18945 }
18946
18948 return Val;
18949
18950 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
18951}
18952
18954 const CCValAssign &VA, const SDLoc &DL,
18955 const RISCVSubtarget &Subtarget) {
18956 EVT LocVT = VA.getLocVT();
18957
18958 switch (VA.getLocInfo()) {
18959 default:
18960 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18961 case CCValAssign::Full:
18962 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
18963 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
18964 break;
18965 case CCValAssign::BCvt:
18966 if (LocVT.isInteger() &&
18967 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
18968 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
18969 } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
18970 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
18971 } else {
18972 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
18973 }
18974 break;
18975 }
18976 return Val;
18977}
18978
18979// The caller is responsible for loading the full value if the argument is
18980// passed with CCValAssign::Indirect.
18982 const CCValAssign &VA, const SDLoc &DL) {
18984 MachineFrameInfo &MFI = MF.getFrameInfo();
18985 EVT LocVT = VA.getLocVT();
18986 EVT ValVT = VA.getValVT();
18988 if (ValVT.isScalableVector()) {
18989 // When the value is a scalable vector, we save the pointer which points to
18990 // the scalable vector value in the stack. The ValVT will be the pointer
18991 // type, instead of the scalable vector type.
18992 ValVT = LocVT;
18993 }
18994 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
18995 /*IsImmutable=*/true);
18996 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
18997 SDValue Val;
18998
18999 ISD::LoadExtType ExtType;
19000 switch (VA.getLocInfo()) {
19001 default:
19002 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19003 case CCValAssign::Full:
19005 case CCValAssign::BCvt:
19006 ExtType = ISD::NON_EXTLOAD;
19007 break;
19008 }
19009 Val = DAG.getExtLoad(
19010 ExtType, DL, LocVT, Chain, FIN,
19012 return Val;
19013}
19014
19016 const CCValAssign &VA,
19017 const CCValAssign &HiVA,
19018 const SDLoc &DL) {
19019 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
19020 "Unexpected VA");
19022 MachineFrameInfo &MFI = MF.getFrameInfo();
19024
19025 assert(VA.isRegLoc() && "Expected register VA assignment");
19026
19027 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19028 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
19029 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
19030 SDValue Hi;
19031 if (HiVA.isMemLoc()) {
19032 // Second half of f64 is passed on the stack.
19033 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
19034 /*IsImmutable=*/true);
19035 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
19036 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
19038 } else {
19039 // Second half of f64 is passed in another GPR.
19040 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19041 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
19042 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
19043 }
19044 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
19045}
19046
19047// FastCC has less than 1% performance improvement for some particular
19048// benchmark. But theoretically, it may has benenfit for some cases.
19050 unsigned ValNo, MVT ValVT, MVT LocVT,
19051 CCValAssign::LocInfo LocInfo,
19052 ISD::ArgFlagsTy ArgFlags, CCState &State,
19053 bool IsFixed, bool IsRet, Type *OrigTy,
19054 const RISCVTargetLowering &TLI,
19055 RVVArgDispatcher &RVVDispatcher) {
19056 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
19057 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
19058 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19059 return false;
19060 }
19061 }
19062
19063 const RISCVSubtarget &Subtarget = TLI.getSubtarget();
19064
19065 if (LocVT == MVT::f16 &&
19066 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
19067 static const MCPhysReg FPR16List[] = {
19068 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
19069 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
19070 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
19071 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
19072 if (unsigned Reg = State.AllocateReg(FPR16List)) {
19073 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19074 return false;
19075 }
19076 }
19077
19078 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
19079 static const MCPhysReg FPR32List[] = {
19080 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
19081 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
19082 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
19083 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
19084 if (unsigned Reg = State.AllocateReg(FPR32List)) {
19085 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19086 return false;
19087 }
19088 }
19089
19090 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
19091 static const MCPhysReg FPR64List[] = {
19092 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
19093 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
19094 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
19095 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
19096 if (unsigned Reg = State.AllocateReg(FPR64List)) {
19097 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19098 return false;
19099 }
19100 }
19101
19102 // Check if there is an available GPR before hitting the stack.
19103 if ((LocVT == MVT::f16 &&
19104 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
19105 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
19106 (LocVT == MVT::f64 && Subtarget.is64Bit() &&
19107 Subtarget.hasStdExtZdinx())) {
19108 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
19109 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19110 return false;
19111 }
19112 }
19113
19114 if (LocVT == MVT::f16) {
19115 unsigned Offset2 = State.AllocateStack(2, Align(2));
19116 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
19117 return false;
19118 }
19119
19120 if (LocVT == MVT::i32 || LocVT == MVT::f32) {
19121 unsigned Offset4 = State.AllocateStack(4, Align(4));
19122 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
19123 return false;
19124 }
19125
19126 if (LocVT == MVT::i64 || LocVT == MVT::f64) {
19127 unsigned Offset5 = State.AllocateStack(8, Align(8));
19128 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
19129 return false;
19130 }
19131
19132 if (LocVT.isVector()) {
19133 MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg();
19134 if (AllocatedVReg) {
19135 // Fixed-length vectors are located in the corresponding scalable-vector
19136 // container types.
19137 if (ValVT.isFixedLengthVector())
19138 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
19139 State.addLoc(
19140 CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo));
19141 } else {
19142 // Try and pass the address via a "fast" GPR.
19143 if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
19144 LocInfo = CCValAssign::Indirect;
19145 LocVT = TLI.getSubtarget().getXLenVT();
19146 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
19147 } else if (ValVT.isFixedLengthVector()) {
19148 auto StackAlign =
19150 unsigned StackOffset =
19151 State.AllocateStack(ValVT.getStoreSize(), StackAlign);
19152 State.addLoc(
19153 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
19154 } else {
19155 // Can't pass scalable vectors on the stack.
19156 return true;
19157 }
19158 }
19159
19160 return false;
19161 }
19162
19163 return true; // CC didn't match.
19164}
19165
19166bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
19167 CCValAssign::LocInfo LocInfo,
19168 ISD::ArgFlagsTy ArgFlags, CCState &State) {
19169 if (ArgFlags.isNest()) {
19171 "Attribute 'nest' is not supported in GHC calling convention");
19172 }
19173
19174 static const MCPhysReg GPRList[] = {
19175 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
19176 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
19177
19178 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
19179 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
19180 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
19181 if (unsigned Reg = State.AllocateReg(GPRList)) {
19182 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19183 return false;
19184 }
19185 }
19186
19187 const RISCVSubtarget &Subtarget =
19189
19190 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
19191 // Pass in STG registers: F1, ..., F6
19192 // fs0 ... fs5
19193 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
19194 RISCV::F18_F, RISCV::F19_F,
19195 RISCV::F20_F, RISCV::F21_F};
19196 if (unsigned Reg = State.AllocateReg(FPR32List)) {
19197 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19198 return false;
19199 }
19200 }
19201
19202 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
19203 // Pass in STG registers: D1, ..., D6
19204 // fs6 ... fs11
19205 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
19206 RISCV::F24_D, RISCV::F25_D,
19207 RISCV::F26_D, RISCV::F27_D};
19208 if (unsigned Reg = State.AllocateReg(FPR64List)) {
19209 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19210 return false;
19211 }
19212 }
19213
19214 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
19215 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
19216 Subtarget.is64Bit())) {
19217 if (unsigned Reg = State.AllocateReg(GPRList)) {
19218 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19219 return false;
19220 }
19221 }
19222
19223 report_fatal_error("No registers left in GHC calling convention");
19224 return true;
19225}
19226
19227// Transform physical registers into virtual registers.
19229 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
19230 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
19231 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
19232
19234
19235 switch (CallConv) {
19236 default:
19237 report_fatal_error("Unsupported calling convention");
19238 case CallingConv::C:
19239 case CallingConv::Fast:
19241 case CallingConv::GRAAL:
19243 break;
19244 case CallingConv::GHC:
19245 if (Subtarget.hasStdExtE())
19246 report_fatal_error("GHC calling convention is not supported on RVE!");
19247 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
19248 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
19249 "(Zdinx/D) instruction set extensions");
19250 }
19251
19252 const Function &Func = MF.getFunction();
19253 if (Func.hasFnAttribute("interrupt")) {
19254 if (!Func.arg_empty())
19256 "Functions with the interrupt attribute cannot have arguments!");
19257
19258 StringRef Kind =
19259 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19260
19261 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
19263 "Function interrupt attribute argument not supported!");
19264 }
19265
19266 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19267 MVT XLenVT = Subtarget.getXLenVT();
19268 unsigned XLenInBytes = Subtarget.getXLen() / 8;
19269 // Used with vargs to acumulate store chains.
19270 std::vector<SDValue> OutChains;
19271
19272 // Assign locations to all of the incoming arguments.
19274 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19275
19276 if (CallConv == CallingConv::GHC)
19278 else
19279 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
19281 : RISCV::CC_RISCV);
19282
19283 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
19284 CCValAssign &VA = ArgLocs[i];
19285 SDValue ArgValue;
19286 // Passing f64 on RV32D with a soft float ABI must be handled as a special
19287 // case.
19288 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19289 assert(VA.needsCustom());
19290 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
19291 } else if (VA.isRegLoc())
19292 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
19293 else
19294 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
19295
19296 if (VA.getLocInfo() == CCValAssign::Indirect) {
19297 // If the original argument was split and passed by reference (e.g. i128
19298 // on RV32), we need to load all parts of it here (using the same
19299 // address). Vectors may be partly split to registers and partly to the
19300 // stack, in which case the base address is partly offset and subsequent
19301 // stores are relative to that.
19302 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
19304 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
19305 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
19306 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19307 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
19308 CCValAssign &PartVA = ArgLocs[i + 1];
19309 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
19310 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19311 if (PartVA.getValVT().isScalableVector())
19312 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19313 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
19314 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
19316 ++i;
19317 ++InsIdx;
19318 }
19319 continue;
19320 }
19321 InVals.push_back(ArgValue);
19322 }
19323
19324 if (any_of(ArgLocs,
19325 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19326 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19327
19328 if (IsVarArg) {
19329 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
19330 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
19331 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
19332 MachineFrameInfo &MFI = MF.getFrameInfo();
19333 MachineRegisterInfo &RegInfo = MF.getRegInfo();
19335
19336 // Size of the vararg save area. For now, the varargs save area is either
19337 // zero or large enough to hold a0-a7.
19338 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
19339 int FI;
19340
19341 // If all registers are allocated, then all varargs must be passed on the
19342 // stack and we don't need to save any argregs.
19343 if (VarArgsSaveSize == 0) {
19344 int VaArgOffset = CCInfo.getStackSize();
19345 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
19346 } else {
19347 int VaArgOffset = -VarArgsSaveSize;
19348 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
19349
19350 // If saving an odd number of registers then create an extra stack slot to
19351 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
19352 // offsets to even-numbered registered remain 2*XLEN-aligned.
19353 if (Idx % 2) {
19355 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
19356 VarArgsSaveSize += XLenInBytes;
19357 }
19358
19359 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19360
19361 // Copy the integer registers that may have been used for passing varargs
19362 // to the vararg save area.
19363 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
19364 const Register Reg = RegInfo.createVirtualRegister(RC);
19365 RegInfo.addLiveIn(ArgRegs[I], Reg);
19366 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
19367 SDValue Store = DAG.getStore(
19368 Chain, DL, ArgValue, FIN,
19369 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
19370 OutChains.push_back(Store);
19371 FIN =
19372 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
19373 }
19374 }
19375
19376 // Record the frame index of the first variable argument
19377 // which is a value necessary to VASTART.
19378 RVFI->setVarArgsFrameIndex(FI);
19379 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
19380 }
19381
19382 // All stores are grouped in one node to allow the matching between
19383 // the size of Ins and InVals. This only happens for vararg functions.
19384 if (!OutChains.empty()) {
19385 OutChains.push_back(Chain);
19386 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
19387 }
19388
19389 return Chain;
19390}
19391
19392/// isEligibleForTailCallOptimization - Check whether the call is eligible
19393/// for tail call optimization.
19394/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
19395bool RISCVTargetLowering::isEligibleForTailCallOptimization(
19396 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
19397 const SmallVector<CCValAssign, 16> &ArgLocs) const {
19398
19399 auto CalleeCC = CLI.CallConv;
19400 auto &Outs = CLI.Outs;
19401 auto &Caller = MF.getFunction();
19402 auto CallerCC = Caller.getCallingConv();
19403
19404 // Exception-handling functions need a special set of instructions to
19405 // indicate a return to the hardware. Tail-calling another function would
19406 // probably break this.
19407 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
19408 // should be expanded as new function attributes are introduced.
19409 if (Caller.hasFnAttribute("interrupt"))
19410 return false;
19411
19412 // Do not tail call opt if the stack is used to pass parameters.
19413 if (CCInfo.getStackSize() != 0)
19414 return false;
19415
19416 // Do not tail call opt if any parameters need to be passed indirectly.
19417 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
19418 // passed indirectly. So the address of the value will be passed in a
19419 // register, or if not available, then the address is put on the stack. In
19420 // order to pass indirectly, space on the stack often needs to be allocated
19421 // in order to store the value. In this case the CCInfo.getNextStackOffset()
19422 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
19423 // are passed CCValAssign::Indirect.
19424 for (auto &VA : ArgLocs)
19425 if (VA.getLocInfo() == CCValAssign::Indirect)
19426 return false;
19427
19428 // Do not tail call opt if either caller or callee uses struct return
19429 // semantics.
19430 auto IsCallerStructRet = Caller.hasStructRetAttr();
19431 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
19432 if (IsCallerStructRet || IsCalleeStructRet)
19433 return false;
19434
19435 // The callee has to preserve all registers the caller needs to preserve.
19436 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
19437 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
19438 if (CalleeCC != CallerCC) {
19439 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
19440 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
19441 return false;
19442 }
19443
19444 // Byval parameters hand the function a pointer directly into the stack area
19445 // we want to reuse during a tail call. Working around this *is* possible
19446 // but less efficient and uglier in LowerCall.
19447 for (auto &Arg : Outs)
19448 if (Arg.Flags.isByVal())
19449 return false;
19450
19451 return true;
19452}
19453
19455 return DAG.getDataLayout().getPrefTypeAlign(
19456 VT.getTypeForEVT(*DAG.getContext()));
19457}
19458
19459// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
19460// and output parameter nodes.
19462 SmallVectorImpl<SDValue> &InVals) const {
19463 SelectionDAG &DAG = CLI.DAG;
19464 SDLoc &DL = CLI.DL;
19466 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
19468 SDValue Chain = CLI.Chain;
19469 SDValue Callee = CLI.Callee;
19470 bool &IsTailCall = CLI.IsTailCall;
19471 CallingConv::ID CallConv = CLI.CallConv;
19472 bool IsVarArg = CLI.IsVarArg;
19473 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19474 MVT XLenVT = Subtarget.getXLenVT();
19475
19477
19478 // Analyze the operands of the call, assigning locations to each operand.
19480 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19481
19482 if (CallConv == CallingConv::GHC) {
19483 if (Subtarget.hasStdExtE())
19484 report_fatal_error("GHC calling convention is not supported on RVE!");
19486 } else
19487 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
19489 : RISCV::CC_RISCV);
19490
19491 // Check if it's really possible to do a tail call.
19492 if (IsTailCall)
19493 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
19494
19495 if (IsTailCall)
19496 ++NumTailCalls;
19497 else if (CLI.CB && CLI.CB->isMustTailCall())
19498 report_fatal_error("failed to perform tail call elimination on a call "
19499 "site marked musttail");
19500
19501 // Get a count of how many bytes are to be pushed on the stack.
19502 unsigned NumBytes = ArgCCInfo.getStackSize();
19503
19504 // Create local copies for byval args
19505 SmallVector<SDValue, 8> ByValArgs;
19506 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19507 ISD::ArgFlagsTy Flags = Outs[i].Flags;
19508 if (!Flags.isByVal())
19509 continue;
19510
19511 SDValue Arg = OutVals[i];
19512 unsigned Size = Flags.getByValSize();
19513 Align Alignment = Flags.getNonZeroByValAlign();
19514
19515 int FI =
19516 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
19517 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
19518 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
19519
19520 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
19521 /*IsVolatile=*/false,
19522 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
19524 ByValArgs.push_back(FIPtr);
19525 }
19526
19527 if (!IsTailCall)
19528 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
19529
19530 // Copy argument values to their designated locations.
19532 SmallVector<SDValue, 8> MemOpChains;
19533 SDValue StackPtr;
19534 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
19535 ++i, ++OutIdx) {
19536 CCValAssign &VA = ArgLocs[i];
19537 SDValue ArgValue = OutVals[OutIdx];
19538 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
19539
19540 // Handle passing f64 on RV32D with a soft float ABI as a special case.
19541 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19542 assert(VA.isRegLoc() && "Expected register VA assignment");
19543 assert(VA.needsCustom());
19544 SDValue SplitF64 = DAG.getNode(
19545 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
19546 SDValue Lo = SplitF64.getValue(0);
19547 SDValue Hi = SplitF64.getValue(1);
19548
19549 Register RegLo = VA.getLocReg();
19550 RegsToPass.push_back(std::make_pair(RegLo, Lo));
19551
19552 // Get the CCValAssign for the Hi part.
19553 CCValAssign &HiVA = ArgLocs[++i];
19554
19555 if (HiVA.isMemLoc()) {
19556 // Second half of f64 is passed on the stack.
19557 if (!StackPtr.getNode())
19558 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19560 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19561 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
19562 // Emit the store.
19563 MemOpChains.push_back(
19564 DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo()));
19565 } else {
19566 // Second half of f64 is passed in another GPR.
19567 Register RegHigh = HiVA.getLocReg();
19568 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
19569 }
19570 continue;
19571 }
19572
19573 // Promote the value if needed.
19574 // For now, only handle fully promoted and indirect arguments.
19575 if (VA.getLocInfo() == CCValAssign::Indirect) {
19576 // Store the argument in a stack slot and pass its address.
19577 Align StackAlign =
19578 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
19579 getPrefTypeAlign(ArgValue.getValueType(), DAG));
19580 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
19581 // If the original argument was split (e.g. i128), we need
19582 // to store the required parts of it here (and pass just one address).
19583 // Vectors may be partly split to registers and partly to the stack, in
19584 // which case the base address is partly offset and subsequent stores are
19585 // relative to that.
19586 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
19587 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
19588 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19589 // Calculate the total size to store. We don't have access to what we're
19590 // actually storing other than performing the loop and collecting the
19591 // info.
19593 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
19594 SDValue PartValue = OutVals[OutIdx + 1];
19595 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
19596 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19597 EVT PartVT = PartValue.getValueType();
19598 if (PartVT.isScalableVector())
19599 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19600 StoredSize += PartVT.getStoreSize();
19601 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
19602 Parts.push_back(std::make_pair(PartValue, Offset));
19603 ++i;
19604 ++OutIdx;
19605 }
19606 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
19607 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
19608 MemOpChains.push_back(
19609 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
19611 for (const auto &Part : Parts) {
19612 SDValue PartValue = Part.first;
19613 SDValue PartOffset = Part.second;
19615 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
19616 MemOpChains.push_back(
19617 DAG.getStore(Chain, DL, PartValue, Address,
19619 }
19620 ArgValue = SpillSlot;
19621 } else {
19622 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
19623 }
19624
19625 // Use local copy if it is a byval arg.
19626 if (Flags.isByVal())
19627 ArgValue = ByValArgs[j++];
19628
19629 if (VA.isRegLoc()) {
19630 // Queue up the argument copies and emit them at the end.
19631 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
19632 } else {
19633 assert(VA.isMemLoc() && "Argument not register or memory");
19634 assert(!IsTailCall && "Tail call not allowed if stack is used "
19635 "for passing parameters");
19636
19637 // Work out the address of the stack slot.
19638 if (!StackPtr.getNode())
19639 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19641 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19643
19644 // Emit the store.
19645 MemOpChains.push_back(
19646 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
19647 }
19648 }
19649
19650 // Join the stores, which are independent of one another.
19651 if (!MemOpChains.empty())
19652 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
19653
19654 SDValue Glue;
19655
19656 // Build a sequence of copy-to-reg nodes, chained and glued together.
19657 for (auto &Reg : RegsToPass) {
19658 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
19659 Glue = Chain.getValue(1);
19660 }
19661
19662 // Validate that none of the argument registers have been marked as
19663 // reserved, if so report an error. Do the same for the return address if this
19664 // is not a tailcall.
19665 validateCCReservedRegs(RegsToPass, MF);
19666 if (!IsTailCall &&
19669 MF.getFunction(),
19670 "Return address register required, but has been reserved."});
19671
19672 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
19673 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
19674 // split it and then direct call can be matched by PseudoCALL.
19675 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
19676 const GlobalValue *GV = S->getGlobal();
19677 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
19678 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
19679 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
19680 }
19681
19682 // The first call operand is the chain and the second is the target address.
19684 Ops.push_back(Chain);
19685 Ops.push_back(Callee);
19686
19687 // Add argument registers to the end of the list so that they are
19688 // known live into the call.
19689 for (auto &Reg : RegsToPass)
19690 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
19691
19692 if (!IsTailCall) {
19693 // Add a register mask operand representing the call-preserved registers.
19694 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
19695 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
19696 assert(Mask && "Missing call preserved mask for calling convention");
19697 Ops.push_back(DAG.getRegisterMask(Mask));
19698 }
19699
19700 // Glue the call to the argument copies, if any.
19701 if (Glue.getNode())
19702 Ops.push_back(Glue);
19703
19704 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
19705 "Unexpected CFI type for a direct call");
19706
19707 // Emit the call.
19708 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
19709
19710 if (IsTailCall) {
19712 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
19713 if (CLI.CFIType)
19714 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19715 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
19716 return Ret;
19717 }
19718
19719 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
19720 if (CLI.CFIType)
19721 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19722 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
19723 Glue = Chain.getValue(1);
19724
19725 // Mark the end of the call, which is glued to the call itself.
19726 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
19727 Glue = Chain.getValue(1);
19728
19729 // Assign locations to each value returned by this call.
19731 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
19732 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
19733
19734 // Copy all of the result registers out of their specified physreg.
19735 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
19736 auto &VA = RVLocs[i];
19737 // Copy the value out
19738 SDValue RetValue =
19739 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
19740 // Glue the RetValue to the end of the call sequence
19741 Chain = RetValue.getValue(1);
19742 Glue = RetValue.getValue(2);
19743
19744 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19745 assert(VA.needsCustom());
19746 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
19747 MVT::i32, Glue);
19748 Chain = RetValue2.getValue(1);
19749 Glue = RetValue2.getValue(2);
19750 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
19751 RetValue2);
19752 }
19753
19754 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
19755
19756 InVals.push_back(RetValue);
19757 }
19758
19759 return Chain;
19760}
19761
19763 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
19764 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
19766 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
19767
19768 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)};
19769
19770 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19771 MVT VT = Outs[i].VT;
19772 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19773 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
19774 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
19775 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
19776 nullptr, *this, Dispatcher))
19777 return false;
19778 }
19779 return true;
19780}
19781
19782SDValue
19784 bool IsVarArg,
19786 const SmallVectorImpl<SDValue> &OutVals,
19787 const SDLoc &DL, SelectionDAG &DAG) const {
19789 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19790
19791 // Stores the assignment of the return value to a location.
19793
19794 // Info about the registers and stack slot.
19795 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
19796 *DAG.getContext());
19797
19798 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
19799 nullptr, RISCV::CC_RISCV);
19800
19801 if (CallConv == CallingConv::GHC && !RVLocs.empty())
19802 report_fatal_error("GHC functions return void only");
19803
19804 SDValue Glue;
19805 SmallVector<SDValue, 4> RetOps(1, Chain);
19806
19807 // Copy the result values into the output registers.
19808 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
19809 SDValue Val = OutVals[OutIdx];
19810 CCValAssign &VA = RVLocs[i];
19811 assert(VA.isRegLoc() && "Can only return in registers!");
19812
19813 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19814 // Handle returning f64 on RV32D with a soft float ABI.
19815 assert(VA.isRegLoc() && "Expected return via registers");
19816 assert(VA.needsCustom());
19817 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
19818 DAG.getVTList(MVT::i32, MVT::i32), Val);
19819 SDValue Lo = SplitF64.getValue(0);
19820 SDValue Hi = SplitF64.getValue(1);
19821 Register RegLo = VA.getLocReg();
19822 Register RegHi = RVLocs[++i].getLocReg();
19823
19824 if (STI.isRegisterReservedByUser(RegLo) ||
19825 STI.isRegisterReservedByUser(RegHi))
19827 MF.getFunction(),
19828 "Return value register required, but has been reserved."});
19829
19830 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
19831 Glue = Chain.getValue(1);
19832 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
19833 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
19834 Glue = Chain.getValue(1);
19835 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
19836 } else {
19837 // Handle a 'normal' return.
19838 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
19839 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
19840
19841 if (STI.isRegisterReservedByUser(VA.getLocReg()))
19843 MF.getFunction(),
19844 "Return value register required, but has been reserved."});
19845
19846 // Guarantee that all emitted copies are stuck together.
19847 Glue = Chain.getValue(1);
19848 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
19849 }
19850 }
19851
19852 RetOps[0] = Chain; // Update chain.
19853
19854 // Add the glue node if we have it.
19855 if (Glue.getNode()) {
19856 RetOps.push_back(Glue);
19857 }
19858
19859 if (any_of(RVLocs,
19860 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19861 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19862
19863 unsigned RetOpc = RISCVISD::RET_GLUE;
19864 // Interrupt service routines use different return instructions.
19865 const Function &Func = DAG.getMachineFunction().getFunction();
19866 if (Func.hasFnAttribute("interrupt")) {
19867 if (!Func.getReturnType()->isVoidTy())
19869 "Functions with the interrupt attribute must have void return type!");
19870
19872 StringRef Kind =
19873 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19874
19875 if (Kind == "supervisor")
19876 RetOpc = RISCVISD::SRET_GLUE;
19877 else
19878 RetOpc = RISCVISD::MRET_GLUE;
19879 }
19880
19881 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
19882}
19883
19884void RISCVTargetLowering::validateCCReservedRegs(
19885 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
19886 MachineFunction &MF) const {
19887 const Function &F = MF.getFunction();
19888 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19889
19890 if (llvm::any_of(Regs, [&STI](auto Reg) {
19891 return STI.isRegisterReservedByUser(Reg.first);
19892 }))
19893 F.getContext().diagnose(DiagnosticInfoUnsupported{
19894 F, "Argument register required, but has been reserved."});
19895}
19896
19897// Check if the result of the node is only used as a return value, as
19898// otherwise we can't perform a tail-call.
19900 if (N->getNumValues() != 1)
19901 return false;
19902 if (!N->hasNUsesOfValue(1, 0))
19903 return false;
19904
19905 SDNode *Copy = *N->use_begin();
19906
19907 if (Copy->getOpcode() == ISD::BITCAST) {
19908 return isUsedByReturnOnly(Copy, Chain);
19909 }
19910
19911 // TODO: Handle additional opcodes in order to support tail-calling libcalls
19912 // with soft float ABIs.
19913 if (Copy->getOpcode() != ISD::CopyToReg) {
19914 return false;
19915 }
19916
19917 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
19918 // isn't safe to perform a tail call.
19919 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
19920 return false;
19921
19922 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
19923 bool HasRet = false;
19924 for (SDNode *Node : Copy->uses()) {
19925 if (Node->getOpcode() != RISCVISD::RET_GLUE)
19926 return false;
19927 HasRet = true;
19928 }
19929 if (!HasRet)
19930 return false;
19931
19932 Chain = Copy->getOperand(0);
19933 return true;
19934}
19935
19937 return CI->isTailCall();
19938}
19939
19940const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
19941#define NODE_NAME_CASE(NODE) \
19942 case RISCVISD::NODE: \
19943 return "RISCVISD::" #NODE;
19944 // clang-format off
19945 switch ((RISCVISD::NodeType)Opcode) {
19947 break;
19948 NODE_NAME_CASE(RET_GLUE)
19949 NODE_NAME_CASE(SRET_GLUE)
19950 NODE_NAME_CASE(MRET_GLUE)
19951 NODE_NAME_CASE(CALL)
19952 NODE_NAME_CASE(SELECT_CC)
19953 NODE_NAME_CASE(BR_CC)
19954 NODE_NAME_CASE(BuildPairF64)
19955 NODE_NAME_CASE(SplitF64)
19956 NODE_NAME_CASE(TAIL)
19957 NODE_NAME_CASE(ADD_LO)
19958 NODE_NAME_CASE(HI)
19959 NODE_NAME_CASE(LLA)
19960 NODE_NAME_CASE(ADD_TPREL)
19961 NODE_NAME_CASE(MULHSU)
19962 NODE_NAME_CASE(SHL_ADD)
19963 NODE_NAME_CASE(SLLW)
19964 NODE_NAME_CASE(SRAW)
19965 NODE_NAME_CASE(SRLW)
19966 NODE_NAME_CASE(DIVW)
19967 NODE_NAME_CASE(DIVUW)
19968 NODE_NAME_CASE(REMUW)
19969 NODE_NAME_CASE(ROLW)
19970 NODE_NAME_CASE(RORW)
19971 NODE_NAME_CASE(CLZW)
19972 NODE_NAME_CASE(CTZW)
19973 NODE_NAME_CASE(ABSW)
19974 NODE_NAME_CASE(FMV_H_X)
19975 NODE_NAME_CASE(FMV_X_ANYEXTH)
19976 NODE_NAME_CASE(FMV_X_SIGNEXTH)
19977 NODE_NAME_CASE(FMV_W_X_RV64)
19978 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
19979 NODE_NAME_CASE(FCVT_X)
19980 NODE_NAME_CASE(FCVT_XU)
19981 NODE_NAME_CASE(FCVT_W_RV64)
19982 NODE_NAME_CASE(FCVT_WU_RV64)
19983 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
19984 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
19985 NODE_NAME_CASE(FP_ROUND_BF16)
19986 NODE_NAME_CASE(FP_EXTEND_BF16)
19987 NODE_NAME_CASE(FROUND)
19988 NODE_NAME_CASE(FCLASS)
19989 NODE_NAME_CASE(FSGNJX)
19990 NODE_NAME_CASE(FMAX)
19991 NODE_NAME_CASE(FMIN)
19992 NODE_NAME_CASE(READ_COUNTER_WIDE)
19993 NODE_NAME_CASE(BREV8)
19994 NODE_NAME_CASE(ORC_B)
19995 NODE_NAME_CASE(ZIP)
19996 NODE_NAME_CASE(UNZIP)
19997 NODE_NAME_CASE(CLMUL)
19998 NODE_NAME_CASE(CLMULH)
19999 NODE_NAME_CASE(CLMULR)
20000 NODE_NAME_CASE(MOPR)
20001 NODE_NAME_CASE(MOPRR)
20002 NODE_NAME_CASE(SHA256SIG0)
20003 NODE_NAME_CASE(SHA256SIG1)
20004 NODE_NAME_CASE(SHA256SUM0)
20005 NODE_NAME_CASE(SHA256SUM1)
20006 NODE_NAME_CASE(SM4KS)
20007 NODE_NAME_CASE(SM4ED)
20008 NODE_NAME_CASE(SM3P0)
20009 NODE_NAME_CASE(SM3P1)
20010 NODE_NAME_CASE(TH_LWD)
20011 NODE_NAME_CASE(TH_LWUD)
20012 NODE_NAME_CASE(TH_LDD)
20013 NODE_NAME_CASE(TH_SWD)
20014 NODE_NAME_CASE(TH_SDD)
20015 NODE_NAME_CASE(VMV_V_V_VL)
20016 NODE_NAME_CASE(VMV_V_X_VL)
20017 NODE_NAME_CASE(VFMV_V_F_VL)
20018 NODE_NAME_CASE(VMV_X_S)
20019 NODE_NAME_CASE(VMV_S_X_VL)
20020 NODE_NAME_CASE(VFMV_S_F_VL)
20021 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
20022 NODE_NAME_CASE(READ_VLENB)
20023 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
20024 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_SSAT)
20025 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_USAT)
20026 NODE_NAME_CASE(VSLIDEUP_VL)
20027 NODE_NAME_CASE(VSLIDE1UP_VL)
20028 NODE_NAME_CASE(VSLIDEDOWN_VL)
20029 NODE_NAME_CASE(VSLIDE1DOWN_VL)
20030 NODE_NAME_CASE(VFSLIDE1UP_VL)
20031 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
20032 NODE_NAME_CASE(VID_VL)
20033 NODE_NAME_CASE(VFNCVT_ROD_VL)
20034 NODE_NAME_CASE(VECREDUCE_ADD_VL)
20035 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
20036 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
20037 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
20038 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
20039 NODE_NAME_CASE(VECREDUCE_AND_VL)
20040 NODE_NAME_CASE(VECREDUCE_OR_VL)
20041 NODE_NAME_CASE(VECREDUCE_XOR_VL)
20042 NODE_NAME_CASE(VECREDUCE_FADD_VL)
20043 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
20044 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
20045 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
20046 NODE_NAME_CASE(ADD_VL)
20047 NODE_NAME_CASE(AND_VL)
20048 NODE_NAME_CASE(MUL_VL)
20049 NODE_NAME_CASE(OR_VL)
20050 NODE_NAME_CASE(SDIV_VL)
20051 NODE_NAME_CASE(SHL_VL)
20052 NODE_NAME_CASE(SREM_VL)
20053 NODE_NAME_CASE(SRA_VL)
20054 NODE_NAME_CASE(SRL_VL)
20055 NODE_NAME_CASE(ROTL_VL)
20056 NODE_NAME_CASE(ROTR_VL)
20057 NODE_NAME_CASE(SUB_VL)
20058 NODE_NAME_CASE(UDIV_VL)
20059 NODE_NAME_CASE(UREM_VL)
20060 NODE_NAME_CASE(XOR_VL)
20061 NODE_NAME_CASE(AVGFLOORS_VL)
20062 NODE_NAME_CASE(AVGFLOORU_VL)
20063 NODE_NAME_CASE(AVGCEILS_VL)
20064 NODE_NAME_CASE(AVGCEILU_VL)
20065 NODE_NAME_CASE(SADDSAT_VL)
20066 NODE_NAME_CASE(UADDSAT_VL)
20067 NODE_NAME_CASE(SSUBSAT_VL)
20068 NODE_NAME_CASE(USUBSAT_VL)
20069 NODE_NAME_CASE(FADD_VL)
20070 NODE_NAME_CASE(FSUB_VL)
20071 NODE_NAME_CASE(FMUL_VL)
20072 NODE_NAME_CASE(FDIV_VL)
20073 NODE_NAME_CASE(FNEG_VL)
20074 NODE_NAME_CASE(FABS_VL)
20075 NODE_NAME_CASE(FSQRT_VL)
20076 NODE_NAME_CASE(FCLASS_VL)
20077 NODE_NAME_CASE(VFMADD_VL)
20078 NODE_NAME_CASE(VFNMADD_VL)
20079 NODE_NAME_CASE(VFMSUB_VL)
20080 NODE_NAME_CASE(VFNMSUB_VL)
20081 NODE_NAME_CASE(VFWMADD_VL)
20082 NODE_NAME_CASE(VFWNMADD_VL)
20083 NODE_NAME_CASE(VFWMSUB_VL)
20084 NODE_NAME_CASE(VFWNMSUB_VL)
20085 NODE_NAME_CASE(FCOPYSIGN_VL)
20086 NODE_NAME_CASE(SMIN_VL)
20087 NODE_NAME_CASE(SMAX_VL)
20088 NODE_NAME_CASE(UMIN_VL)
20089 NODE_NAME_CASE(UMAX_VL)
20090 NODE_NAME_CASE(BITREVERSE_VL)
20091 NODE_NAME_CASE(BSWAP_VL)
20092 NODE_NAME_CASE(CTLZ_VL)
20093 NODE_NAME_CASE(CTTZ_VL)
20094 NODE_NAME_CASE(CTPOP_VL)
20095 NODE_NAME_CASE(VFMIN_VL)
20096 NODE_NAME_CASE(VFMAX_VL)
20097 NODE_NAME_CASE(MULHS_VL)
20098 NODE_NAME_CASE(MULHU_VL)
20099 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
20100 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
20101 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
20102 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
20103 NODE_NAME_CASE(VFCVT_X_F_VL)
20104 NODE_NAME_CASE(VFCVT_XU_F_VL)
20105 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
20106 NODE_NAME_CASE(SINT_TO_FP_VL)
20107 NODE_NAME_CASE(UINT_TO_FP_VL)
20108 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
20109 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
20110 NODE_NAME_CASE(FP_EXTEND_VL)
20111 NODE_NAME_CASE(FP_ROUND_VL)
20112 NODE_NAME_CASE(STRICT_FADD_VL)
20113 NODE_NAME_CASE(STRICT_FSUB_VL)
20114 NODE_NAME_CASE(STRICT_FMUL_VL)
20115 NODE_NAME_CASE(STRICT_FDIV_VL)
20116 NODE_NAME_CASE(STRICT_FSQRT_VL)
20117 NODE_NAME_CASE(STRICT_VFMADD_VL)
20118 NODE_NAME_CASE(STRICT_VFNMADD_VL)
20119 NODE_NAME_CASE(STRICT_VFMSUB_VL)
20120 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
20121 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
20122 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
20123 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
20124 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
20125 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
20126 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
20127 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
20128 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
20129 NODE_NAME_CASE(STRICT_FSETCC_VL)
20130 NODE_NAME_CASE(STRICT_FSETCCS_VL)
20131 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
20132 NODE_NAME_CASE(VWMUL_VL)
20133 NODE_NAME_CASE(VWMULU_VL)
20134 NODE_NAME_CASE(VWMULSU_VL)
20135 NODE_NAME_CASE(VWADD_VL)
20136 NODE_NAME_CASE(VWADDU_VL)
20137 NODE_NAME_CASE(VWSUB_VL)
20138 NODE_NAME_CASE(VWSUBU_VL)
20139 NODE_NAME_CASE(VWADD_W_VL)
20140 NODE_NAME_CASE(VWADDU_W_VL)
20141 NODE_NAME_CASE(VWSUB_W_VL)
20142 NODE_NAME_CASE(VWSUBU_W_VL)
20143 NODE_NAME_CASE(VWSLL_VL)
20144 NODE_NAME_CASE(VFWMUL_VL)
20145 NODE_NAME_CASE(VFWADD_VL)
20146 NODE_NAME_CASE(VFWSUB_VL)
20147 NODE_NAME_CASE(VFWADD_W_VL)
20148 NODE_NAME_CASE(VFWSUB_W_VL)
20149 NODE_NAME_CASE(VWMACC_VL)
20150 NODE_NAME_CASE(VWMACCU_VL)
20151 NODE_NAME_CASE(VWMACCSU_VL)
20152 NODE_NAME_CASE(VNSRL_VL)
20153 NODE_NAME_CASE(SETCC_VL)
20154 NODE_NAME_CASE(VMERGE_VL)
20155 NODE_NAME_CASE(VMAND_VL)
20156 NODE_NAME_CASE(VMOR_VL)
20157 NODE_NAME_CASE(VMXOR_VL)
20158 NODE_NAME_CASE(VMCLR_VL)
20159 NODE_NAME_CASE(VMSET_VL)
20160 NODE_NAME_CASE(VRGATHER_VX_VL)
20161 NODE_NAME_CASE(VRGATHER_VV_VL)
20162 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
20163 NODE_NAME_CASE(VSEXT_VL)
20164 NODE_NAME_CASE(VZEXT_VL)
20165 NODE_NAME_CASE(VCPOP_VL)
20166 NODE_NAME_CASE(VFIRST_VL)
20167 NODE_NAME_CASE(READ_CSR)
20168 NODE_NAME_CASE(WRITE_CSR)
20169 NODE_NAME_CASE(SWAP_CSR)
20170 NODE_NAME_CASE(CZERO_EQZ)
20171 NODE_NAME_CASE(CZERO_NEZ)
20172 NODE_NAME_CASE(SW_GUARDED_BRIND)
20173 NODE_NAME_CASE(SF_VC_XV_SE)
20174 NODE_NAME_CASE(SF_VC_IV_SE)
20175 NODE_NAME_CASE(SF_VC_VV_SE)
20176 NODE_NAME_CASE(SF_VC_FV_SE)
20177 NODE_NAME_CASE(SF_VC_XVV_SE)
20178 NODE_NAME_CASE(SF_VC_IVV_SE)
20179 NODE_NAME_CASE(SF_VC_VVV_SE)
20180 NODE_NAME_CASE(SF_VC_FVV_SE)
20181 NODE_NAME_CASE(SF_VC_XVW_SE)
20182 NODE_NAME_CASE(SF_VC_IVW_SE)
20183 NODE_NAME_CASE(SF_VC_VVW_SE)
20184 NODE_NAME_CASE(SF_VC_FVW_SE)
20185 NODE_NAME_CASE(SF_VC_V_X_SE)
20186 NODE_NAME_CASE(SF_VC_V_I_SE)
20187 NODE_NAME_CASE(SF_VC_V_XV_SE)
20188 NODE_NAME_CASE(SF_VC_V_IV_SE)
20189 NODE_NAME_CASE(SF_VC_V_VV_SE)
20190 NODE_NAME_CASE(SF_VC_V_FV_SE)
20191 NODE_NAME_CASE(SF_VC_V_XVV_SE)
20192 NODE_NAME_CASE(SF_VC_V_IVV_SE)
20193 NODE_NAME_CASE(SF_VC_V_VVV_SE)
20194 NODE_NAME_CASE(SF_VC_V_FVV_SE)
20195 NODE_NAME_CASE(SF_VC_V_XVW_SE)
20196 NODE_NAME_CASE(SF_VC_V_IVW_SE)
20197 NODE_NAME_CASE(SF_VC_V_VVW_SE)
20198 NODE_NAME_CASE(SF_VC_V_FVW_SE)
20199 }
20200 // clang-format on
20201 return nullptr;
20202#undef NODE_NAME_CASE
20203}
20204
20205/// getConstraintType - Given a constraint letter, return the type of
20206/// constraint it is for this target.
20209 if (Constraint.size() == 1) {
20210 switch (Constraint[0]) {
20211 default:
20212 break;
20213 case 'f':
20214 return C_RegisterClass;
20215 case 'I':
20216 case 'J':
20217 case 'K':
20218 return C_Immediate;
20219 case 'A':
20220 return C_Memory;
20221 case 's':
20222 case 'S': // A symbolic address
20223 return C_Other;
20224 }
20225 } else {
20226 if (Constraint == "vr" || Constraint == "vm")
20227 return C_RegisterClass;
20228 }
20229 return TargetLowering::getConstraintType(Constraint);
20230}
20231
20232std::pair<unsigned, const TargetRegisterClass *>
20234 StringRef Constraint,
20235 MVT VT) const {
20236 // First, see if this is a constraint that directly corresponds to a RISC-V
20237 // register class.
20238 if (Constraint.size() == 1) {
20239 switch (Constraint[0]) {
20240 case 'r':
20241 // TODO: Support fixed vectors up to XLen for P extension?
20242 if (VT.isVector())
20243 break;
20244 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
20245 return std::make_pair(0U, &RISCV::GPRF16RegClass);
20246 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
20247 return std::make_pair(0U, &RISCV::GPRF32RegClass);
20248 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20249 return std::make_pair(0U, &RISCV::GPRPairRegClass);
20250 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
20251 case 'f':
20252 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
20253 return std::make_pair(0U, &RISCV::FPR16RegClass);
20254 if (Subtarget.hasStdExtF() && VT == MVT::f32)
20255 return std::make_pair(0U, &RISCV::FPR32RegClass);
20256 if (Subtarget.hasStdExtD() && VT == MVT::f64)
20257 return std::make_pair(0U, &RISCV::FPR64RegClass);
20258 break;
20259 default:
20260 break;
20261 }
20262 } else if (Constraint == "vr") {
20263 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
20264 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20265 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
20266 return std::make_pair(0U, RC);
20267 }
20268 } else if (Constraint == "vm") {
20269 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
20270 return std::make_pair(0U, &RISCV::VMV0RegClass);
20271 }
20272
20273 // Clang will correctly decode the usage of register name aliases into their
20274 // official names. However, other frontends like `rustc` do not. This allows
20275 // users of these frontends to use the ABI names for registers in LLVM-style
20276 // register constraints.
20277 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
20278 .Case("{zero}", RISCV::X0)
20279 .Case("{ra}", RISCV::X1)
20280 .Case("{sp}", RISCV::X2)
20281 .Case("{gp}", RISCV::X3)
20282 .Case("{tp}", RISCV::X4)
20283 .Case("{t0}", RISCV::X5)
20284 .Case("{t1}", RISCV::X6)
20285 .Case("{t2}", RISCV::X7)
20286 .Cases("{s0}", "{fp}", RISCV::X8)
20287 .Case("{s1}", RISCV::X9)
20288 .Case("{a0}", RISCV::X10)
20289 .Case("{a1}", RISCV::X11)
20290 .Case("{a2}", RISCV::X12)
20291 .Case("{a3}", RISCV::X13)
20292 .Case("{a4}", RISCV::X14)
20293 .Case("{a5}", RISCV::X15)
20294 .Case("{a6}", RISCV::X16)
20295 .Case("{a7}", RISCV::X17)
20296 .Case("{s2}", RISCV::X18)
20297 .Case("{s3}", RISCV::X19)
20298 .Case("{s4}", RISCV::X20)
20299 .Case("{s5}", RISCV::X21)
20300 .Case("{s6}", RISCV::X22)
20301 .Case("{s7}", RISCV::X23)
20302 .Case("{s8}", RISCV::X24)
20303 .Case("{s9}", RISCV::X25)
20304 .Case("{s10}", RISCV::X26)
20305 .Case("{s11}", RISCV::X27)
20306 .Case("{t3}", RISCV::X28)
20307 .Case("{t4}", RISCV::X29)
20308 .Case("{t5}", RISCV::X30)
20309 .Case("{t6}", RISCV::X31)
20310 .Default(RISCV::NoRegister);
20311 if (XRegFromAlias != RISCV::NoRegister)
20312 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
20313
20314 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
20315 // TableGen record rather than the AsmName to choose registers for InlineAsm
20316 // constraints, plus we want to match those names to the widest floating point
20317 // register type available, manually select floating point registers here.
20318 //
20319 // The second case is the ABI name of the register, so that frontends can also
20320 // use the ABI names in register constraint lists.
20321 if (Subtarget.hasStdExtF()) {
20322 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
20323 .Cases("{f0}", "{ft0}", RISCV::F0_F)
20324 .Cases("{f1}", "{ft1}", RISCV::F1_F)
20325 .Cases("{f2}", "{ft2}", RISCV::F2_F)
20326 .Cases("{f3}", "{ft3}", RISCV::F3_F)
20327 .Cases("{f4}", "{ft4}", RISCV::F4_F)
20328 .Cases("{f5}", "{ft5}", RISCV::F5_F)
20329 .Cases("{f6}", "{ft6}", RISCV::F6_F)
20330 .Cases("{f7}", "{ft7}", RISCV::F7_F)
20331 .Cases("{f8}", "{fs0}", RISCV::F8_F)
20332 .Cases("{f9}", "{fs1}", RISCV::F9_F)
20333 .Cases("{f10}", "{fa0}", RISCV::F10_F)
20334 .Cases("{f11}", "{fa1}", RISCV::F11_F)
20335 .Cases("{f12}", "{fa2}", RISCV::F12_F)
20336 .Cases("{f13}", "{fa3}", RISCV::F13_F)
20337 .Cases("{f14}", "{fa4}", RISCV::F14_F)
20338 .Cases("{f15}", "{fa5}", RISCV::F15_F)
20339 .Cases("{f16}", "{fa6}", RISCV::F16_F)
20340 .Cases("{f17}", "{fa7}", RISCV::F17_F)
20341 .Cases("{f18}", "{fs2}", RISCV::F18_F)
20342 .Cases("{f19}", "{fs3}", RISCV::F19_F)
20343 .Cases("{f20}", "{fs4}", RISCV::F20_F)
20344 .Cases("{f21}", "{fs5}", RISCV::F21_F)
20345 .Cases("{f22}", "{fs6}", RISCV::F22_F)
20346 .Cases("{f23}", "{fs7}", RISCV::F23_F)
20347 .Cases("{f24}", "{fs8}", RISCV::F24_F)
20348 .Cases("{f25}", "{fs9}", RISCV::F25_F)
20349 .Cases("{f26}", "{fs10}", RISCV::F26_F)
20350 .Cases("{f27}", "{fs11}", RISCV::F27_F)
20351 .Cases("{f28}", "{ft8}", RISCV::F28_F)
20352 .Cases("{f29}", "{ft9}", RISCV::F29_F)
20353 .Cases("{f30}", "{ft10}", RISCV::F30_F)
20354 .Cases("{f31}", "{ft11}", RISCV::F31_F)
20355 .Default(RISCV::NoRegister);
20356 if (FReg != RISCV::NoRegister) {
20357 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
20358 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
20359 unsigned RegNo = FReg - RISCV::F0_F;
20360 unsigned DReg = RISCV::F0_D + RegNo;
20361 return std::make_pair(DReg, &RISCV::FPR64RegClass);
20362 }
20363 if (VT == MVT::f32 || VT == MVT::Other)
20364 return std::make_pair(FReg, &RISCV::FPR32RegClass);
20365 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
20366 unsigned RegNo = FReg - RISCV::F0_F;
20367 unsigned HReg = RISCV::F0_H + RegNo;
20368 return std::make_pair(HReg, &RISCV::FPR16RegClass);
20369 }
20370 }
20371 }
20372
20373 if (Subtarget.hasVInstructions()) {
20374 Register VReg = StringSwitch<Register>(Constraint.lower())
20375 .Case("{v0}", RISCV::V0)
20376 .Case("{v1}", RISCV::V1)
20377 .Case("{v2}", RISCV::V2)
20378 .Case("{v3}", RISCV::V3)
20379 .Case("{v4}", RISCV::V4)
20380 .Case("{v5}", RISCV::V5)
20381 .Case("{v6}", RISCV::V6)
20382 .Case("{v7}", RISCV::V7)
20383 .Case("{v8}", RISCV::V8)
20384 .Case("{v9}", RISCV::V9)
20385 .Case("{v10}", RISCV::V10)
20386 .Case("{v11}", RISCV::V11)
20387 .Case("{v12}", RISCV::V12)
20388 .Case("{v13}", RISCV::V13)
20389 .Case("{v14}", RISCV::V14)
20390 .Case("{v15}", RISCV::V15)
20391 .Case("{v16}", RISCV::V16)
20392 .Case("{v17}", RISCV::V17)
20393 .Case("{v18}", RISCV::V18)
20394 .Case("{v19}", RISCV::V19)
20395 .Case("{v20}", RISCV::V20)
20396 .Case("{v21}", RISCV::V21)
20397 .Case("{v22}", RISCV::V22)
20398 .Case("{v23}", RISCV::V23)
20399 .Case("{v24}", RISCV::V24)
20400 .Case("{v25}", RISCV::V25)
20401 .Case("{v26}", RISCV::V26)
20402 .Case("{v27}", RISCV::V27)
20403 .Case("{v28}", RISCV::V28)
20404 .Case("{v29}", RISCV::V29)
20405 .Case("{v30}", RISCV::V30)
20406 .Case("{v31}", RISCV::V31)
20407 .Default(RISCV::NoRegister);
20408 if (VReg != RISCV::NoRegister) {
20409 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
20410 return std::make_pair(VReg, &RISCV::VMRegClass);
20411 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
20412 return std::make_pair(VReg, &RISCV::VRRegClass);
20413 for (const auto *RC :
20414 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20415 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
20416 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
20417 return std::make_pair(VReg, RC);
20418 }
20419 }
20420 }
20421 }
20422
20423 std::pair<Register, const TargetRegisterClass *> Res =
20425
20426 // If we picked one of the Zfinx register classes, remap it to the GPR class.
20427 // FIXME: When Zfinx is supported in CodeGen this will need to take the
20428 // Subtarget into account.
20429 if (Res.second == &RISCV::GPRF16RegClass ||
20430 Res.second == &RISCV::GPRF32RegClass ||
20431 Res.second == &RISCV::GPRPairRegClass)
20432 return std::make_pair(Res.first, &RISCV::GPRRegClass);
20433
20434 return Res;
20435}
20436
20439 // Currently only support length 1 constraints.
20440 if (ConstraintCode.size() == 1) {
20441 switch (ConstraintCode[0]) {
20442 case 'A':
20444 default:
20445 break;
20446 }
20447 }
20448
20449 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
20450}
20451
20453 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
20454 SelectionDAG &DAG) const {
20455 // Currently only support length 1 constraints.
20456 if (Constraint.size() == 1) {
20457 switch (Constraint[0]) {
20458 case 'I':
20459 // Validate & create a 12-bit signed immediate operand.
20460 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20461 uint64_t CVal = C->getSExtValue();
20462 if (isInt<12>(CVal))
20463 Ops.push_back(
20464 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20465 }
20466 return;
20467 case 'J':
20468 // Validate & create an integer zero operand.
20469 if (isNullConstant(Op))
20470 Ops.push_back(
20471 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
20472 return;
20473 case 'K':
20474 // Validate & create a 5-bit unsigned immediate operand.
20475 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20476 uint64_t CVal = C->getZExtValue();
20477 if (isUInt<5>(CVal))
20478 Ops.push_back(
20479 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20480 }
20481 return;
20482 case 'S':
20484 return;
20485 default:
20486 break;
20487 }
20488 }
20489 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20490}
20491
20493 Instruction *Inst,
20494 AtomicOrdering Ord) const {
20495 if (Subtarget.hasStdExtZtso()) {
20496 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20497 return Builder.CreateFence(Ord);
20498 return nullptr;
20499 }
20500
20501 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20502 return Builder.CreateFence(Ord);
20503 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
20504 return Builder.CreateFence(AtomicOrdering::Release);
20505 return nullptr;
20506}
20507
20509 Instruction *Inst,
20510 AtomicOrdering Ord) const {
20511 if (Subtarget.hasStdExtZtso()) {
20512 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20513 return Builder.CreateFence(Ord);
20514 return nullptr;
20515 }
20516
20517 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
20518 return Builder.CreateFence(AtomicOrdering::Acquire);
20519 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
20522 return nullptr;
20523}
20524
20527 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
20528 // point operations can't be used in an lr/sc sequence without breaking the
20529 // forward-progress guarantee.
20530 if (AI->isFloatingPointOperation() ||
20534
20535 // Don't expand forced atomics, we want to have __sync libcalls instead.
20536 if (Subtarget.hasForcedAtomics())
20538
20539 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20540 if (AI->getOperation() == AtomicRMWInst::Nand) {
20541 if (Subtarget.hasStdExtZacas() &&
20542 (Size >= 32 || Subtarget.hasStdExtZabha()))
20544 if (Size < 32)
20546 }
20547
20548 if (Size < 32 && !Subtarget.hasStdExtZabha())
20550
20552}
20553
20554static Intrinsic::ID
20556 if (XLen == 32) {
20557 switch (BinOp) {
20558 default:
20559 llvm_unreachable("Unexpected AtomicRMW BinOp");
20561 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
20562 case AtomicRMWInst::Add:
20563 return Intrinsic::riscv_masked_atomicrmw_add_i32;
20564 case AtomicRMWInst::Sub:
20565 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
20567 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
20568 case AtomicRMWInst::Max:
20569 return Intrinsic::riscv_masked_atomicrmw_max_i32;
20570 case AtomicRMWInst::Min:
20571 return Intrinsic::riscv_masked_atomicrmw_min_i32;
20573 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
20575 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
20576 }
20577 }
20578
20579 if (XLen == 64) {
20580 switch (BinOp) {
20581 default:
20582 llvm_unreachable("Unexpected AtomicRMW BinOp");
20584 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
20585 case AtomicRMWInst::Add:
20586 return Intrinsic::riscv_masked_atomicrmw_add_i64;
20587 case AtomicRMWInst::Sub:
20588 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
20590 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
20591 case AtomicRMWInst::Max:
20592 return Intrinsic::riscv_masked_atomicrmw_max_i64;
20593 case AtomicRMWInst::Min:
20594 return Intrinsic::riscv_masked_atomicrmw_min_i64;
20596 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
20598 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
20599 }
20600 }
20601
20602 llvm_unreachable("Unexpected XLen\n");
20603}
20604
20606 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20607 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20608 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
20609 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
20610 // mask, as this produces better code than the LR/SC loop emitted by
20611 // int_riscv_masked_atomicrmw_xchg.
20612 if (AI->getOperation() == AtomicRMWInst::Xchg &&
20613 isa<ConstantInt>(AI->getValOperand())) {
20614 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
20615 if (CVal->isZero())
20616 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
20617 Builder.CreateNot(Mask, "Inv_Mask"),
20618 AI->getAlign(), Ord);
20619 if (CVal->isMinusOne())
20620 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
20621 AI->getAlign(), Ord);
20622 }
20623
20624 unsigned XLen = Subtarget.getXLen();
20625 Value *Ordering =
20626 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
20627 Type *Tys[] = {AlignedAddr->getType()};
20628 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
20629 AI->getModule(),
20631
20632 if (XLen == 64) {
20633 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
20634 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20635 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
20636 }
20637
20638 Value *Result;
20639
20640 // Must pass the shift amount needed to sign extend the loaded value prior
20641 // to performing a signed comparison for min/max. ShiftAmt is the number of
20642 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
20643 // is the number of bits to left+right shift the value in order to
20644 // sign-extend.
20645 if (AI->getOperation() == AtomicRMWInst::Min ||
20647 const DataLayout &DL = AI->getDataLayout();
20648 unsigned ValWidth =
20649 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
20650 Value *SextShamt =
20651 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
20652 Result = Builder.CreateCall(LrwOpScwLoop,
20653 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
20654 } else {
20655 Result =
20656 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
20657 }
20658
20659 if (XLen == 64)
20660 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20661 return Result;
20662}
20663
20666 AtomicCmpXchgInst *CI) const {
20667 // Don't expand forced atomics, we want to have __sync libcalls instead.
20668 if (Subtarget.hasForcedAtomics())
20670
20672 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
20673 (Size == 8 || Size == 16))
20676}
20677
20679 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20680 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20681 unsigned XLen = Subtarget.getXLen();
20682 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
20683 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
20684 if (XLen == 64) {
20685 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
20686 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
20687 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20688 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
20689 }
20690 Type *Tys[] = {AlignedAddr->getType()};
20691 Function *MaskedCmpXchg =
20692 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
20693 Value *Result = Builder.CreateCall(
20694 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
20695 if (XLen == 64)
20696 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20697 return Result;
20698}
20699
20701 EVT DataVT) const {
20702 // We have indexed loads for all supported EEW types. Indices are always
20703 // zero extended.
20704 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
20705 isTypeLegal(Extend.getValueType()) &&
20706 isTypeLegal(Extend.getOperand(0).getValueType()) &&
20707 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
20708}
20709
20711 EVT VT) const {
20712 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
20713 return false;
20714
20715 switch (FPVT.getSimpleVT().SimpleTy) {
20716 case MVT::f16:
20717 return Subtarget.hasStdExtZfhmin();
20718 case MVT::f32:
20719 return Subtarget.hasStdExtF();
20720 case MVT::f64:
20721 return Subtarget.hasStdExtD();
20722 default:
20723 return false;
20724 }
20725}
20726
20728 // If we are using the small code model, we can reduce size of jump table
20729 // entry to 4 bytes.
20730 if (Subtarget.is64Bit() && !isPositionIndependent() &&
20733 }
20735}
20736
20738 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
20739 unsigned uid, MCContext &Ctx) const {
20740 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
20742 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
20743}
20744
20746 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
20747 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
20748 // a power of two as well.
20749 // FIXME: This doesn't work for zve32, but that's already broken
20750 // elsewhere for the same reason.
20751 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
20752 static_assert(RISCV::RVVBitsPerBlock == 64,
20753 "RVVBitsPerBlock changed, audit needed");
20754 return true;
20755}
20756
20758 SDValue &Offset,
20760 SelectionDAG &DAG) const {
20761 // Target does not support indexed loads.
20762 if (!Subtarget.hasVendorXTHeadMemIdx())
20763 return false;
20764
20765 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
20766 return false;
20767
20768 Base = Op->getOperand(0);
20769 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
20770 int64_t RHSC = RHS->getSExtValue();
20771 if (Op->getOpcode() == ISD::SUB)
20772 RHSC = -(uint64_t)RHSC;
20773
20774 // The constants that can be encoded in the THeadMemIdx instructions
20775 // are of the form (sign_extend(imm5) << imm2).
20776 bool isLegalIndexedOffset = false;
20777 for (unsigned i = 0; i < 4; i++)
20778 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
20779 isLegalIndexedOffset = true;
20780 break;
20781 }
20782
20783 if (!isLegalIndexedOffset)
20784 return false;
20785
20786 Offset = Op->getOperand(1);
20787 return true;
20788 }
20789
20790 return false;
20791}
20792
20794 SDValue &Offset,
20796 SelectionDAG &DAG) const {
20797 EVT VT;
20798 SDValue Ptr;
20799 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20800 VT = LD->getMemoryVT();
20801 Ptr = LD->getBasePtr();
20802 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20803 VT = ST->getMemoryVT();
20804 Ptr = ST->getBasePtr();
20805 } else
20806 return false;
20807
20808 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
20809 return false;
20810
20811 AM = ISD::PRE_INC;
20812 return true;
20813}
20814
20816 SDValue &Base,
20817 SDValue &Offset,
20819 SelectionDAG &DAG) const {
20820 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
20821 if (Op->getOpcode() != ISD::ADD)
20822 return false;
20823
20824 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N))
20825 Base = LS->getBasePtr();
20826 else
20827 return false;
20828
20829 if (Base == Op->getOperand(0))
20830 Offset = Op->getOperand(1);
20831 else if (Base == Op->getOperand(1))
20832 Offset = Op->getOperand(0);
20833 else
20834 return false;
20835
20836 AM = ISD::POST_INC;
20837 return true;
20838 }
20839
20840 EVT VT;
20841 SDValue Ptr;
20842 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20843 VT = LD->getMemoryVT();
20844 Ptr = LD->getBasePtr();
20845 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20846 VT = ST->getMemoryVT();
20847 Ptr = ST->getBasePtr();
20848 } else
20849 return false;
20850
20851 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
20852 return false;
20853 // Post-indexing updates the base, so it's not a valid transform
20854 // if that's not the same as the load's pointer.
20855 if (Ptr != Base)
20856 return false;
20857
20858 AM = ISD::POST_INC;
20859 return true;
20860}
20861
20863 EVT VT) const {
20864 EVT SVT = VT.getScalarType();
20865
20866 if (!SVT.isSimple())
20867 return false;
20868
20869 switch (SVT.getSimpleVT().SimpleTy) {
20870 case MVT::f16:
20871 return VT.isVector() ? Subtarget.hasVInstructionsF16()
20872 : Subtarget.hasStdExtZfhOrZhinx();
20873 case MVT::f32:
20874 return Subtarget.hasStdExtFOrZfinx();
20875 case MVT::f64:
20876 return Subtarget.hasStdExtDOrZdinx();
20877 default:
20878 break;
20879 }
20880
20881 return false;
20882}
20883
20885 // Zacas will use amocas.w which does not require extension.
20886 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
20887}
20888
20890 const Constant *PersonalityFn) const {
20891 return RISCV::X10;
20892}
20893
20895 const Constant *PersonalityFn) const {
20896 return RISCV::X11;
20897}
20898
20900 // Return false to suppress the unnecessary extensions if the LibCall
20901 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
20902 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
20903 Type.getSizeInBits() < Subtarget.getXLen()))
20904 return false;
20905
20906 return true;
20907}
20908
20910 if (Subtarget.is64Bit() && Type == MVT::i32)
20911 return true;
20912
20913 return IsSigned;
20914}
20915
20917 SDValue C) const {
20918 // Check integral scalar types.
20919 if (!VT.isScalarInteger())
20920 return false;
20921
20922 // Omit the optimization if the sub target has the M extension and the data
20923 // size exceeds XLen.
20924 const bool HasZmmul = Subtarget.hasStdExtZmmul();
20925 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
20926 return false;
20927
20928 auto *ConstNode = cast<ConstantSDNode>(C);
20929 const APInt &Imm = ConstNode->getAPIntValue();
20930
20931 // Break the MUL to a SLLI and an ADD/SUB.
20932 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
20933 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
20934 return true;
20935
20936 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
20937 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
20938 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
20939 (Imm - 8).isPowerOf2()))
20940 return true;
20941
20942 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
20943 // a pair of LUI/ADDI.
20944 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
20945 ConstNode->hasOneUse()) {
20946 APInt ImmS = Imm.ashr(Imm.countr_zero());
20947 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
20948 (1 - ImmS).isPowerOf2())
20949 return true;
20950 }
20951
20952 return false;
20953}
20954
20956 SDValue ConstNode) const {
20957 // Let the DAGCombiner decide for vectors.
20958 EVT VT = AddNode.getValueType();
20959 if (VT.isVector())
20960 return true;
20961
20962 // Let the DAGCombiner decide for larger types.
20963 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
20964 return true;
20965
20966 // It is worse if c1 is simm12 while c1*c2 is not.
20967 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
20968 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
20969 const APInt &C1 = C1Node->getAPIntValue();
20970 const APInt &C2 = C2Node->getAPIntValue();
20971 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
20972 return false;
20973
20974 // Default to true and let the DAGCombiner decide.
20975 return true;
20976}
20977
20979 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
20980 unsigned *Fast) const {
20981 if (!VT.isVector()) {
20982 if (Fast)
20983 *Fast = Subtarget.enableUnalignedScalarMem();
20984 return Subtarget.enableUnalignedScalarMem();
20985 }
20986
20987 // All vector implementations must support element alignment
20988 EVT ElemVT = VT.getVectorElementType();
20989 if (Alignment >= ElemVT.getStoreSize()) {
20990 if (Fast)
20991 *Fast = 1;
20992 return true;
20993 }
20994
20995 // Note: We lower an unmasked unaligned vector access to an equally sized
20996 // e8 element type access. Given this, we effectively support all unmasked
20997 // misaligned accesses. TODO: Work through the codegen implications of
20998 // allowing such accesses to be formed, and considered fast.
20999 if (Fast)
21000 *Fast = Subtarget.enableUnalignedVectorMem();
21001 return Subtarget.enableUnalignedVectorMem();
21002}
21003
21004
21006 const AttributeList &FuncAttributes) const {
21007 if (!Subtarget.hasVInstructions())
21008 return MVT::Other;
21009
21010 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
21011 return MVT::Other;
21012
21013 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
21014 // has an expansion threshold, and we want the number of hardware memory
21015 // operations to correspond roughly to that threshold. LMUL>1 operations
21016 // are typically expanded linearly internally, and thus correspond to more
21017 // than one actual memory operation. Note that store merging and load
21018 // combining will typically form larger LMUL operations from the LMUL1
21019 // operations emitted here, and that's okay because combining isn't
21020 // introducing new memory operations; it's just merging existing ones.
21021 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
21022 if (Op.size() < MinVLenInBytes)
21023 // TODO: Figure out short memops. For the moment, do the default thing
21024 // which ends up using scalar sequences.
21025 return MVT::Other;
21026
21027 // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
21028 // fixed vectors.
21029 if (MinVLenInBytes <= RISCV::RVVBitsPerBlock / 8)
21030 return MVT::Other;
21031
21032 // Prefer i8 for non-zero memset as it allows us to avoid materializing
21033 // a large scalar constant and instead use vmv.v.x/i to do the
21034 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
21035 // maximize the chance we can encode the size in the vsetvli.
21036 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
21037 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
21038
21039 // Do we have sufficient alignment for our preferred VT? If not, revert
21040 // to largest size allowed by our alignment criteria.
21041 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
21042 Align RequiredAlign(PreferredVT.getStoreSize());
21043 if (Op.isFixedDstAlign())
21044 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
21045 if (Op.isMemcpy())
21046 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
21047 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
21048 }
21049 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
21050}
21051
21053 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
21054 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
21055 bool IsABIRegCopy = CC.has_value();
21056 EVT ValueVT = Val.getValueType();
21057 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
21058 PartVT == MVT::f32) {
21059 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
21060 // nan, and cast to f32.
21061 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
21062 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
21063 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
21064 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
21065 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
21066 Parts[0] = Val;
21067 return true;
21068 }
21069
21070 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
21071 LLVMContext &Context = *DAG.getContext();
21072 EVT ValueEltVT = ValueVT.getVectorElementType();
21073 EVT PartEltVT = PartVT.getVectorElementType();
21074 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21075 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21076 if (PartVTBitSize % ValueVTBitSize == 0) {
21077 assert(PartVTBitSize >= ValueVTBitSize);
21078 // If the element types are different, bitcast to the same element type of
21079 // PartVT first.
21080 // Give an example here, we want copy a <vscale x 1 x i8> value to
21081 // <vscale x 4 x i16>.
21082 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
21083 // subvector, then we can bitcast to <vscale x 4 x i16>.
21084 if (ValueEltVT != PartEltVT) {
21085 if (PartVTBitSize > ValueVTBitSize) {
21086 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21087 assert(Count != 0 && "The number of element should not be zero.");
21088 EVT SameEltTypeVT =
21089 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21090 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
21091 DAG.getUNDEF(SameEltTypeVT), Val,
21092 DAG.getVectorIdxConstant(0, DL));
21093 }
21094 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
21095 } else {
21096 Val =
21097 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
21098 Val, DAG.getVectorIdxConstant(0, DL));
21099 }
21100 Parts[0] = Val;
21101 return true;
21102 }
21103 }
21104 return false;
21105}
21106
21108 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
21109 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
21110 bool IsABIRegCopy = CC.has_value();
21111 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
21112 PartVT == MVT::f32) {
21113 SDValue Val = Parts[0];
21114
21115 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
21116 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
21117 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
21118 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
21119 return Val;
21120 }
21121
21122 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
21123 LLVMContext &Context = *DAG.getContext();
21124 SDValue Val = Parts[0];
21125 EVT ValueEltVT = ValueVT.getVectorElementType();
21126 EVT PartEltVT = PartVT.getVectorElementType();
21127 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21128 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21129 if (PartVTBitSize % ValueVTBitSize == 0) {
21130 assert(PartVTBitSize >= ValueVTBitSize);
21131 EVT SameEltTypeVT = ValueVT;
21132 // If the element types are different, convert it to the same element type
21133 // of PartVT.
21134 // Give an example here, we want copy a <vscale x 1 x i8> value from
21135 // <vscale x 4 x i16>.
21136 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
21137 // then we can extract <vscale x 1 x i8>.
21138 if (ValueEltVT != PartEltVT) {
21139 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21140 assert(Count != 0 && "The number of element should not be zero.");
21141 SameEltTypeVT =
21142 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21143 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
21144 }
21145 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
21146 DAG.getVectorIdxConstant(0, DL));
21147 return Val;
21148 }
21149 }
21150 return SDValue();
21151}
21152
21154 // When aggressively optimizing for code size, we prefer to use a div
21155 // instruction, as it is usually smaller than the alternative sequence.
21156 // TODO: Add vector division?
21157 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
21158 return OptSize && !VT.isVector();
21159}
21160
21162 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
21163 // some situation.
21164 unsigned Opc = N->getOpcode();
21165 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
21166 return false;
21167 return true;
21168}
21169
21170static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
21171 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
21172 Function *ThreadPointerFunc =
21173 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
21174 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
21175 IRB.CreateCall(ThreadPointerFunc), Offset);
21176}
21177
21179 // Fuchsia provides a fixed TLS slot for the stack cookie.
21180 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
21181 if (Subtarget.isTargetFuchsia())
21182 return useTpOffset(IRB, -0x10);
21183
21184 // Android provides a fixed TLS slot for the stack cookie. See the definition
21185 // of TLS_SLOT_STACK_GUARD in
21186 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
21187 if (Subtarget.isTargetAndroid())
21188 return useTpOffset(IRB, -0x18);
21189
21191}
21192
21194 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
21195 const DataLayout &DL) const {
21196 EVT VT = getValueType(DL, VTy);
21197 // Don't lower vlseg/vsseg for vector types that can't be split.
21198 if (!isTypeLegal(VT))
21199 return false;
21200
21202 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
21203 Alignment))
21204 return false;
21205
21206 MVT ContainerVT = VT.getSimpleVT();
21207
21208 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21209 if (!Subtarget.useRVVForFixedLengthVectors())
21210 return false;
21211 // Sometimes the interleaved access pass picks up splats as interleaves of
21212 // one element. Don't lower these.
21213 if (FVTy->getNumElements() < 2)
21214 return false;
21215
21217 } else {
21218 // The intrinsics for scalable vectors are not overloaded on pointer type
21219 // and can only handle the default address space.
21220 if (AddrSpace)
21221 return false;
21222 }
21223
21224 // Need to make sure that EMUL * NFIELDS ≤ 8
21225 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
21226 if (Fractional)
21227 return true;
21228 return Factor * LMUL <= 8;
21229}
21230
21232 Align Alignment) const {
21233 if (!Subtarget.hasVInstructions())
21234 return false;
21235
21236 // Only support fixed vectors if we know the minimum vector size.
21237 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
21238 return false;
21239
21240 EVT ScalarType = DataType.getScalarType();
21241 if (!isLegalElementTypeForRVV(ScalarType))
21242 return false;
21243
21244 if (!Subtarget.enableUnalignedVectorMem() &&
21245 Alignment < ScalarType.getStoreSize())
21246 return false;
21247
21248 return true;
21249}
21250
21252 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
21253 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
21254 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
21255 Intrinsic::riscv_seg8_load};
21256
21257/// Lower an interleaved load into a vlsegN intrinsic.
21258///
21259/// E.g. Lower an interleaved load (Factor = 2):
21260/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
21261/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21262/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21263///
21264/// Into:
21265/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
21266/// %ptr, i64 4)
21267/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
21268/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
21271 ArrayRef<unsigned> Indices, unsigned Factor) const {
21272 IRBuilder<> Builder(LI);
21273
21274 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
21275 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
21277 LI->getDataLayout()))
21278 return false;
21279
21280 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21281
21282 Function *VlsegNFunc =
21284 {VTy, LI->getPointerOperandType(), XLenTy});
21285
21286 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
21287
21288 CallInst *VlsegN =
21289 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
21290
21291 for (unsigned i = 0; i < Shuffles.size(); i++) {
21292 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
21293 Shuffles[i]->replaceAllUsesWith(SubVec);
21294 }
21295
21296 return true;
21297}
21298
21300 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
21301 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
21302 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
21303 Intrinsic::riscv_seg8_store};
21304
21305/// Lower an interleaved store into a vssegN intrinsic.
21306///
21307/// E.g. Lower an interleaved store (Factor = 3):
21308/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21309/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21310/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21311///
21312/// Into:
21313/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21314/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21315/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21316/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
21317/// %ptr, i32 4)
21318///
21319/// Note that the new shufflevectors will be removed and we'll only generate one
21320/// vsseg3 instruction in CodeGen.
21322 ShuffleVectorInst *SVI,
21323 unsigned Factor) const {
21324 IRBuilder<> Builder(SI);
21325 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
21326 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
21327 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
21328 ShuffleVTy->getNumElements() / Factor);
21329 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
21330 SI->getPointerAddressSpace(),
21331 SI->getDataLayout()))
21332 return false;
21333
21334 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21335
21336 Function *VssegNFunc =
21337 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
21338 {VTy, SI->getPointerOperandType(), XLenTy});
21339
21340 auto Mask = SVI->getShuffleMask();
21342
21343 for (unsigned i = 0; i < Factor; i++) {
21344 Value *Shuffle = Builder.CreateShuffleVector(
21345 SVI->getOperand(0), SVI->getOperand(1),
21346 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
21347 Ops.push_back(Shuffle);
21348 }
21349 // This VL should be OK (should be executable in one vsseg instruction,
21350 // potentially under larger LMULs) because we checked that the fixed vector
21351 // type fits in isLegalInterleavedAccessType
21352 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
21353 Ops.append({SI->getPointerOperand(), VL});
21354
21355 Builder.CreateCall(VssegNFunc, Ops);
21356
21357 return true;
21358}
21359
21361 IntrinsicInst *DI, LoadInst *LI,
21362 SmallVectorImpl<Instruction *> &DeadInsts) const {
21363 assert(LI->isSimple());
21364 IRBuilder<> Builder(LI);
21365
21366 // Only deinterleave2 supported at present.
21367 if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
21368 return false;
21369
21370 unsigned Factor = 2;
21371
21372 VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
21373 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
21374
21375 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
21377 LI->getDataLayout()))
21378 return false;
21379
21380 Function *VlsegNFunc;
21381 Value *VL;
21382 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21384
21385 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21386 VlsegNFunc = Intrinsic::getDeclaration(
21387 LI->getModule(), FixedVlsegIntrIds[Factor - 2],
21388 {ResVTy, LI->getPointerOperandType(), XLenTy});
21389 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21390 } else {
21391 static const Intrinsic::ID IntrIds[] = {
21392 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
21393 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
21394 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
21395 Intrinsic::riscv_vlseg8};
21396
21397 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],
21398 {ResVTy, XLenTy});
21399 VL = Constant::getAllOnesValue(XLenTy);
21400 Ops.append(Factor, PoisonValue::get(ResVTy));
21401 }
21402
21403 Ops.append({LI->getPointerOperand(), VL});
21404
21405 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
21406 DI->replaceAllUsesWith(Vlseg);
21407
21408 return true;
21409}
21410
21413 SmallVectorImpl<Instruction *> &DeadInsts) const {
21414 assert(SI->isSimple());
21415 IRBuilder<> Builder(SI);
21416
21417 // Only interleave2 supported at present.
21418 if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
21419 return false;
21420
21421 unsigned Factor = 2;
21422
21423 VectorType *VTy = cast<VectorType>(II->getType());
21424 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
21425
21426 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
21427 SI->getPointerAddressSpace(),
21428 SI->getDataLayout()))
21429 return false;
21430
21431 Function *VssegNFunc;
21432 Value *VL;
21433 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21434
21435 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21436 VssegNFunc = Intrinsic::getDeclaration(
21437 SI->getModule(), FixedVssegIntrIds[Factor - 2],
21438 {InVTy, SI->getPointerOperandType(), XLenTy});
21439 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21440 } else {
21441 static const Intrinsic::ID IntrIds[] = {
21442 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
21443 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
21444 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
21445 Intrinsic::riscv_vsseg8};
21446
21447 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
21448 {InVTy, XLenTy});
21449 VL = Constant::getAllOnesValue(XLenTy);
21450 }
21451
21452 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
21453 SI->getPointerOperand(), VL});
21454
21455 return true;
21456}
21457
21461 const TargetInstrInfo *TII) const {
21462 assert(MBBI->isCall() && MBBI->getCFIType() &&
21463 "Invalid call instruction for a KCFI check");
21464 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
21465 MBBI->getOpcode()));
21466
21467 MachineOperand &Target = MBBI->getOperand(0);
21468 Target.setIsRenamable(false);
21469
21470 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
21471 .addReg(Target.getReg())
21472 .addImm(MBBI->getCFIType())
21473 .getInstr();
21474}
21475
21476#define GET_REGISTER_MATCHER
21477#include "RISCVGenAsmMatcher.inc"
21478
21481 const MachineFunction &MF) const {
21483 if (Reg == RISCV::NoRegister)
21485 if (Reg == RISCV::NoRegister)
21487 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
21488 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
21489 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
21490 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
21491 StringRef(RegName) + "\"."));
21492 return Reg;
21493}
21494
21497 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
21498
21499 if (NontemporalInfo == nullptr)
21501
21502 // 1 for default value work as __RISCV_NTLH_ALL
21503 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
21504 // 3 -> __RISCV_NTLH_ALL_PRIVATE
21505 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
21506 // 5 -> __RISCV_NTLH_ALL
21507 int NontemporalLevel = 5;
21508 const MDNode *RISCVNontemporalInfo =
21509 I.getMetadata("riscv-nontemporal-domain");
21510 if (RISCVNontemporalInfo != nullptr)
21511 NontemporalLevel =
21512 cast<ConstantInt>(
21513 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
21514 ->getValue())
21515 ->getZExtValue();
21516
21517 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
21518 "RISC-V target doesn't support this non-temporal domain.");
21519
21520 NontemporalLevel -= 2;
21522 if (NontemporalLevel & 0b1)
21523 Flags |= MONontemporalBit0;
21524 if (NontemporalLevel & 0b10)
21525 Flags |= MONontemporalBit1;
21526
21527 return Flags;
21528}
21529
21532
21533 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
21535 TargetFlags |= (NodeFlags & MONontemporalBit0);
21536 TargetFlags |= (NodeFlags & MONontemporalBit1);
21537 return TargetFlags;
21538}
21539
21541 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
21542 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
21543}
21544
21546 if (VT.isScalableVector())
21547 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
21548 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
21549 return true;
21550 return Subtarget.hasStdExtZbb() &&
21551 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
21552}
21553
21555 ISD::CondCode Cond) const {
21556 return isCtpopFast(VT) ? 0 : 1;
21557}
21558
21560
21561 // GISel support is in progress or complete for these opcodes.
21562 unsigned Op = Inst.getOpcode();
21563 if (Op == Instruction::Add || Op == Instruction::Sub ||
21564 Op == Instruction::And || Op == Instruction::Or ||
21565 Op == Instruction::Xor || Op == Instruction::InsertElement ||
21566 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
21567 Op == Instruction::Freeze || Op == Instruction::Store)
21568 return false;
21569
21570 if (Inst.getType()->isScalableTy())
21571 return true;
21572
21573 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
21574 if (Inst.getOperand(i)->getType()->isScalableTy() &&
21575 !isa<ReturnInst>(&Inst))
21576 return true;
21577
21578 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
21579 if (AI->getAllocatedType()->isScalableTy())
21580 return true;
21581 }
21582
21583 return false;
21584}
21585
21586SDValue
21587RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
21588 SelectionDAG &DAG,
21589 SmallVectorImpl<SDNode *> &Created) const {
21591 if (isIntDivCheap(N->getValueType(0), Attr))
21592 return SDValue(N, 0); // Lower SDIV as SDIV
21593
21594 // Only perform this transform if short forward branch opt is supported.
21595 if (!Subtarget.hasShortForwardBranchOpt())
21596 return SDValue();
21597 EVT VT = N->getValueType(0);
21598 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
21599 return SDValue();
21600
21601 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
21602 if (Divisor.sgt(2048) || Divisor.slt(-2048))
21603 return SDValue();
21604 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
21605}
21606
21607bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
21608 EVT VT, const APInt &AndMask) const {
21609 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
21610 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
21612}
21613
21614unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
21615 return Subtarget.getMinimumJumpTableEntries();
21616}
21617
21618// Handle single arg such as return value.
21619template <typename Arg>
21620void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) {
21621 // This lambda determines whether an array of types are constructed by
21622 // homogeneous vector types.
21623 auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) {
21624 // First, extract the first element in the argument type.
21625 auto It = ArgList.begin();
21626 MVT FirstArgRegType = It->VT;
21627
21628 // Return if there is no return or the type needs split.
21629 if (It == ArgList.end() || It->Flags.isSplit())
21630 return false;
21631
21632 ++It;
21633
21634 // Return if this argument type contains only 1 element, or it's not a
21635 // vector type.
21636 if (It == ArgList.end() || !FirstArgRegType.isScalableVector())
21637 return false;
21638
21639 // Second, check if the following elements in this argument type are all the
21640 // same.
21641 for (; It != ArgList.end(); ++It)
21642 if (It->Flags.isSplit() || It->VT != FirstArgRegType)
21643 return false;
21644
21645 return true;
21646 };
21647
21648 if (isHomogeneousScalableVectorType(ArgList)) {
21649 // Handle as tuple type
21650 RVVArgInfos.push_back({(unsigned)ArgList.size(), ArgList[0].VT, false});
21651 } else {
21652 // Handle as normal vector type
21653 bool FirstVMaskAssigned = false;
21654 for (const auto &OutArg : ArgList) {
21655 MVT RegisterVT = OutArg.VT;
21656
21657 // Skip non-RVV register type
21658 if (!RegisterVT.isVector())
21659 continue;
21660
21661 if (RegisterVT.isFixedLengthVector())
21662 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21663
21664 if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) {
21665 RVVArgInfos.push_back({1, RegisterVT, true});
21666 FirstVMaskAssigned = true;
21667 continue;
21668 }
21669
21670 RVVArgInfos.push_back({1, RegisterVT, false});
21671 }
21672 }
21673}
21674
21675// Handle multiple args.
21676template <>
21677void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) {
21678 const DataLayout &DL = MF->getDataLayout();
21679 const Function &F = MF->getFunction();
21680 LLVMContext &Context = F.getContext();
21681
21682 bool FirstVMaskAssigned = false;
21683 for (Type *Ty : TypeList) {
21684 StructType *STy = dyn_cast<StructType>(Ty);
21685 if (STy && STy->containsHomogeneousScalableVectorTypes()) {
21686 Type *ElemTy = STy->getTypeAtIndex(0U);
21687 EVT VT = TLI->getValueType(DL, ElemTy);
21688 MVT RegisterVT =
21689 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21690 unsigned NumRegs =
21691 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21692
21693 RVVArgInfos.push_back(
21694 {NumRegs * STy->getNumElements(), RegisterVT, false});
21695 } else {
21696 SmallVector<EVT, 4> ValueVTs;
21697 ComputeValueVTs(*TLI, DL, Ty, ValueVTs);
21698
21699 for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
21700 ++Value) {
21701 EVT VT = ValueVTs[Value];
21702 MVT RegisterVT =
21703 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21704 unsigned NumRegs =
21705 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21706
21707 // Skip non-RVV register type
21708 if (!RegisterVT.isVector())
21709 continue;
21710
21711 if (RegisterVT.isFixedLengthVector())
21712 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21713
21714 if (!FirstVMaskAssigned &&
21715 RegisterVT.getVectorElementType() == MVT::i1) {
21716 RVVArgInfos.push_back({1, RegisterVT, true});
21717 FirstVMaskAssigned = true;
21718 --NumRegs;
21719 }
21720
21721 RVVArgInfos.insert(RVVArgInfos.end(), NumRegs, {1, RegisterVT, false});
21722 }
21723 }
21724 }
21725}
21726
21727void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul,
21728 unsigned StartReg) {
21729 assert((StartReg % LMul) == 0 &&
21730 "Start register number should be multiple of lmul");
21731 const MCPhysReg *VRArrays;
21732 switch (LMul) {
21733 default:
21734 report_fatal_error("Invalid lmul");
21735 case 1:
21736 VRArrays = ArgVRs;
21737 break;
21738 case 2:
21739 VRArrays = ArgVRM2s;
21740 break;
21741 case 4:
21742 VRArrays = ArgVRM4s;
21743 break;
21744 case 8:
21745 VRArrays = ArgVRM8s;
21746 break;
21747 }
21748
21749 for (unsigned i = 0; i < NF; ++i)
21750 if (StartReg)
21751 AllocatedPhysRegs.push_back(VRArrays[(StartReg - 8) / LMul + i]);
21752 else
21753 AllocatedPhysRegs.push_back(MCPhysReg());
21754}
21755
21756/// This function determines if each RVV argument is passed by register, if the
21757/// argument can be assigned to a VR, then give it a specific register.
21758/// Otherwise, assign the argument to 0 which is a invalid MCPhysReg.
21759void RVVArgDispatcher::compute() {
21760 uint32_t AssignedMap = 0;
21761 auto allocate = [&](const RVVArgInfo &ArgInfo) {
21762 // Allocate first vector mask argument to V0.
21763 if (ArgInfo.FirstVMask) {
21764 AllocatedPhysRegs.push_back(RISCV::V0);
21765 return;
21766 }
21767
21768 unsigned RegsNeeded = divideCeil(
21769 ArgInfo.VT.getSizeInBits().getKnownMinValue(), RISCV::RVVBitsPerBlock);
21770 unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded;
21771 for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs;
21772 StartReg += RegsNeeded) {
21773 uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg;
21774 if ((AssignedMap & Map) == 0) {
21775 allocatePhysReg(ArgInfo.NF, RegsNeeded, StartReg + 8);
21776 AssignedMap |= Map;
21777 return;
21778 }
21779 }
21780
21781 allocatePhysReg(ArgInfo.NF, RegsNeeded, 0);
21782 };
21783
21784 for (unsigned i = 0; i < RVVArgInfos.size(); ++i)
21785 allocate(RVVArgInfos[i]);
21786}
21787
21789 assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range");
21790 return AllocatedPhysRegs[CurIdx++];
21791}
21792
21795 int JTI,
21796 SelectionDAG &DAG) const {
21797 if (Subtarget.hasStdExtZicfilp()) {
21798 // When Zicfilp enabled, we need to use software guarded branch for jump
21799 // table branch.
21800 SDValue JTInfo = DAG.getJumpTableDebugInfo(JTI, Value, dl);
21801 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, JTInfo,
21802 Addr);
21803 }
21804 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
21805}
21806
21808
21809#define GET_RISCVVIntrinsicsTable_IMPL
21810#include "RISCVGenSearchableTables.inc"
21811
21812} // namespace llvm::RISCVVIntrinsicsTable
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
amdgpu AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define NL
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define RegName(no)
const MCPhysReg ArgFPR32s[]
const MCPhysReg ArgVRs[]
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
const MCPhysReg ArgFPR64s[]
const MCPhysReg ArgGPRs[]
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
const char LLVMTargetMachineRef TM
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2, bool EABI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static bool hasPassthruOp(unsigned Opcode)
Return true if a RISC-V target specified op has a passthru operand.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static const MCPhysReg ArgVRM2s[]
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< unsigned > preAssignMask(const ArgTy &Args)
static SDValue getVLOperand(SDValue Op)
static SDValue lowerBUILD_VECTORvXf16(SDValue Op, SelectionDAG &DAG)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, bool EvenElts, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static ArrayRef< MCPhysReg > getFastCCArgGPRs(const RISCVABI::ABI ABI)
static const MCPhysReg ArgVRM8s[]
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static const MCPhysReg ArgVRM4s[]
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static const MCPhysReg ArgFPR16s[]
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static const Intrinsic::ID FixedVssegIntrIds[]
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
const SmallVectorImpl< MachineOperand > & Cond
#define ROTR(x, n)
Definition: SHA256.cpp:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isCommutative(Instruction *I)
#define ROTL(x, b)
Definition: SipHash.cpp:32
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1249
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1241
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:1021
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:209
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1366
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1472
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1310
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1181
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:351
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1162
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:360
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:189
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:309
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition: APInt.cpp:1614
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1377
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:415
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:199
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1491
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition: APInt.cpp:1706
bool isMask(unsigned numBits) const
Definition: APInt.h:468
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:314
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1237
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:420
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:286
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1110
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:276
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1369
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:453
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:266
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:219
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1522
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1201
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
an instruction to allocate memory on the stack
Definition: Instructions.h:61
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:495
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:696
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:809
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:708
@ Add
*p = old + v
Definition: Instructions.h:712
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:726
@ Or
*p = old | v
Definition: Instructions.h:720
@ Sub
*p = old - v
Definition: Instructions.h:714
@ And
*p = old & v
Definition: Instructions.h:716
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:748
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:724
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:730
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:728
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:752
@ Nand
*p = ~(old & v)
Definition: Instructions.h:718
bool isFloatingPointOperation() const
Definition: Instructions.h:864
BinOp getOperation() const
Definition: Instructions.h:787
Value * getValOperand()
Definition: Instructions.h:856
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:829
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:218
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:109
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:386
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:881
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:314
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:680
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:214
iterator_range< arg_iterator > args()
Definition: Function.h:890
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:769
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:702
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:281
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:357
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:380
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:219
Argument * getArg(unsigned i) const
Definition: Function.h:884
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1884
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2524
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1839
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2041
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:523
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:528
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1754
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1349
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:494
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2502
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1859
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2015
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2420
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:513
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2674
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
Class to represent integer types.
Definition: DerivedTypes.h:40
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:55
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:174
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:259
Value * getPointerOperand()
Definition: Instructions.h:253
bool isSimple() const
Definition: Instructions.h:245
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:209
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:393
Metadata node.
Definition: Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1430
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:403
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtDOrZdinx() const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
bool hasVInstructionsBF16Minimal() const
unsigned getDLenFactor() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool isRegisterReservedByUser(Register i) const
bool hasVInstructionsF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool isTargetAndroid() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
std::pair< int, bool > getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI, SmallVectorImpl< Instruction * > &DeadInsts) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, LoadInst *LI, SmallVectorImpl< Instruction * > &DeadInsts) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
static RISCVII::VLMUL getLMUL(MVT VT)
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
As per the spec, the rules for passing vector arguments are as follows:
static constexpr unsigned NumArgVRs
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:738
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:495
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:392
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:748
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:844
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:489
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:677
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:878
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:490
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:789
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:692
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:784
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:484
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:815
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:861
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:502
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:755
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:572
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:894
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:503
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:95
size_t size() const
Definition: SmallVector.h:92
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:951
void reserve(size_type N)
Definition: SmallVector.h:677
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:697
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:819
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
An instruction for storing to memory.
Definition: Instructions.h:290
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
std::string lower() const
Definition: StringRef.cpp:111
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
Class to represent struct types.
Definition: DerivedTypes.h:216
bool containsHomogeneousScalableVectorTypes() const
Returns true if this struct contains homogeneous scalable vector types.
Definition: Type.cpp:423
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:341
Type * getTypeAtIndex(const Value *V) const
Given an index value into the type, return the type of the element.
Definition: Type.cpp:600
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:245
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:372
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:343
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:258
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
self_iterator getIterator()
Definition: ilist_node.h:132
#define INT64_MIN
Definition: DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Entry
Definition: COFF.h:826
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition: CallingConv.h:268
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1169
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1165
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:752
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:490
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1382
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1330
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1415
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1312
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:573
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:743
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1198
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1314
@ STRICT_FCEIL
Definition: ISDOpcodes.h:440
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1315
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1074
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:497
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:820
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:557
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1400
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1404
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:716
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1271
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1276
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1414
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:491
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:943
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1310
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:933
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1311
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1455
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:915
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:684
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:464
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:634
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1231
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1397
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:751
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1264
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1401
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1031
@ STRICT_LROUND
Definition: ISDOpcodes.h:445
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:960
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1120
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1313
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1099
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition: ISDOpcodes.h:600
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:660
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:521
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:756
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1416
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:641
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1194
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:444
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1409
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:910
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:673
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:614
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1308
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1254
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:886
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:771
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1372
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1291
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1316
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1008
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1084
@ STRICT_LRINT
Definition: ISDOpcodes.h:447
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:828
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:696
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:605
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:918
@ STRICT_FROUND
Definition: ISDOpcodes.h:442
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:765
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:463
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
Definition: ISDOpcodes.h:1342
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1417
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:441
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:443
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:952
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1306
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:457
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:479
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:456
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1027
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1307
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:866
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1225
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:484
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1251
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:679
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:650
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ STRICT_LLRINT
Definition: ISDOpcodes.h:448
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:626
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1305
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:899
@ STRICT_LLROUND
Definition: ISDOpcodes.h:446
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:437
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:885
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1405
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1189
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1113
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:793
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:507
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ STRICT_FRINT
Definition: ISDOpcodes.h:436
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition: ISDOpcodes.h:594
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:691
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1540
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1540
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1527
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
static const int FIRST_TARGET_STRICTFP_OPCODE
FIRST_TARGET_STRICTFP_OPCODE - Target-specific pre-isel operations which cannot raise FP exceptions s...
Definition: ISDOpcodes.h:1461
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1578
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1558
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1623
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1539
@ Bitcast
Perform the operation on a different, but equivalently sized type.
ABI getTargetABI(StringRef ABIName)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:599
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
@ SW_GUARDED_BRIND
Software guarded BRIND node.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition: DWP.cpp:480
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2406
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1535
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:394
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:403
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:79
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:254
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:329
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Helper struct shared between Function Specialization and SCCP Solver.
Definition: SCCPSolver.h:41
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:387
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:415
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:404
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:203
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1042
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:62
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:263
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:150
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:161
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:70
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:285
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:300
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:169
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:1002
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:269
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:285
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasDisjoint() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)