LLVM 20.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
18#include "RISCVRegisterInfo.h"
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
41#include "llvm/Support/Debug.h"
47#include <optional>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "riscv-lower"
52
53STATISTIC(NumTailCalls, "Number of tail calls");
54
56 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
57 cl::desc("Give the maximum size (in number of nodes) of the web of "
58 "instructions that we will consider for VW expansion"),
59 cl::init(18));
60
61static cl::opt<bool>
62 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
63 cl::desc("Allow the formation of VW_W operations (e.g., "
64 "VWADD_W) with splat constants"),
65 cl::init(false));
66
68 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
69 cl::desc("Set the minimum number of repetitions of a divisor to allow "
70 "transformation to multiplications by the reciprocal"),
71 cl::init(2));
72
73static cl::opt<int>
75 cl::desc("Give the maximum number of instructions that we will "
76 "use for creating a floating-point immediate value"),
77 cl::init(2));
78
80 const RISCVSubtarget &STI)
81 : TargetLowering(TM), Subtarget(STI) {
82
83 RISCVABI::ABI ABI = Subtarget.getTargetABI();
84 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
85
86 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
87 !Subtarget.hasStdExtF()) {
88 errs() << "Hard-float 'f' ABI can't be used for a target that "
89 "doesn't support the F instruction set extension (ignoring "
90 "target-abi)\n";
92 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
93 !Subtarget.hasStdExtD()) {
94 errs() << "Hard-float 'd' ABI can't be used for a target that "
95 "doesn't support the D instruction set extension (ignoring "
96 "target-abi)\n";
98 }
99
100 switch (ABI) {
101 default:
102 report_fatal_error("Don't know how to lower this ABI");
111 break;
112 }
113
114 MVT XLenVT = Subtarget.getXLenVT();
115
116 // Set up the register classes.
117 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
118
119 if (Subtarget.hasStdExtZfhmin())
120 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
121 if (Subtarget.hasStdExtZfbfmin())
122 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
123 if (Subtarget.hasStdExtF())
124 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
125 if (Subtarget.hasStdExtD())
126 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
127 if (Subtarget.hasStdExtZhinxmin())
128 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
129 if (Subtarget.hasStdExtZfinx())
130 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
131 if (Subtarget.hasStdExtZdinx()) {
132 if (Subtarget.is64Bit())
133 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
134 else
135 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
136 }
137
138 static const MVT::SimpleValueType BoolVecVTs[] = {
139 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
140 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
141 static const MVT::SimpleValueType IntVecVTs[] = {
142 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
143 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
144 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
145 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
146 MVT::nxv4i64, MVT::nxv8i64};
147 static const MVT::SimpleValueType F16VecVTs[] = {
148 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
149 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
150 static const MVT::SimpleValueType BF16VecVTs[] = {
151 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
152 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
153 static const MVT::SimpleValueType F32VecVTs[] = {
154 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
155 static const MVT::SimpleValueType F64VecVTs[] = {
156 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
157
158 if (Subtarget.hasVInstructions()) {
159 auto addRegClassForRVV = [this](MVT VT) {
160 // Disable the smallest fractional LMUL types if ELEN is less than
161 // RVVBitsPerBlock.
162 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
163 if (VT.getVectorMinNumElements() < MinElts)
164 return;
165
166 unsigned Size = VT.getSizeInBits().getKnownMinValue();
167 const TargetRegisterClass *RC;
169 RC = &RISCV::VRRegClass;
170 else if (Size == 2 * RISCV::RVVBitsPerBlock)
171 RC = &RISCV::VRM2RegClass;
172 else if (Size == 4 * RISCV::RVVBitsPerBlock)
173 RC = &RISCV::VRM4RegClass;
174 else if (Size == 8 * RISCV::RVVBitsPerBlock)
175 RC = &RISCV::VRM8RegClass;
176 else
177 llvm_unreachable("Unexpected size");
178
179 addRegisterClass(VT, RC);
180 };
181
182 for (MVT VT : BoolVecVTs)
183 addRegClassForRVV(VT);
184 for (MVT VT : IntVecVTs) {
185 if (VT.getVectorElementType() == MVT::i64 &&
186 !Subtarget.hasVInstructionsI64())
187 continue;
188 addRegClassForRVV(VT);
189 }
190
191 if (Subtarget.hasVInstructionsF16Minimal())
192 for (MVT VT : F16VecVTs)
193 addRegClassForRVV(VT);
194
195 if (Subtarget.hasVInstructionsBF16Minimal())
196 for (MVT VT : BF16VecVTs)
197 addRegClassForRVV(VT);
198
199 if (Subtarget.hasVInstructionsF32())
200 for (MVT VT : F32VecVTs)
201 addRegClassForRVV(VT);
202
203 if (Subtarget.hasVInstructionsF64())
204 for (MVT VT : F64VecVTs)
205 addRegClassForRVV(VT);
206
207 if (Subtarget.useRVVForFixedLengthVectors()) {
208 auto addRegClassForFixedVectors = [this](MVT VT) {
209 MVT ContainerVT = getContainerForFixedLengthVector(VT);
210 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
211 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
212 addRegisterClass(VT, TRI.getRegClass(RCID));
213 };
215 if (useRVVForFixedLengthVectorVT(VT))
216 addRegClassForFixedVectors(VT);
217
219 if (useRVVForFixedLengthVectorVT(VT))
220 addRegClassForFixedVectors(VT);
221 }
222 }
223
224 // Compute derived properties from the register classes.
226
228
230 MVT::i1, Promote);
231 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
233 MVT::i1, Promote);
234
235 // TODO: add all necessary setOperationAction calls.
237
242
247 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
250 }
251
253
256
258
260
261 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
262 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
263 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
264
265 if (Subtarget.is64Bit()) {
267
270 MVT::i32, Custom);
272 if (!Subtarget.hasStdExtZbb())
275 Custom);
277 }
278 if (!Subtarget.hasStdExtZmmul()) {
280 } else if (Subtarget.is64Bit()) {
283 } else {
285 }
286
287 if (!Subtarget.hasStdExtM()) {
289 Expand);
290 } else if (Subtarget.is64Bit()) {
292 {MVT::i8, MVT::i16, MVT::i32}, Custom);
293 }
294
297 Expand);
298
300 Custom);
301
302 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
303 if (Subtarget.is64Bit())
305 } else if (Subtarget.hasVendorXTHeadBb()) {
306 if (Subtarget.is64Bit())
309 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
311 } else {
313 }
314
315 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
316 // pattern match it directly in isel.
318 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
319 Subtarget.hasVendorXTHeadBb())
320 ? Legal
321 : Expand);
322
323 if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
325 } else {
326 // Zbkb can use rev8+brev8 to implement bitreverse.
328 Subtarget.hasStdExtZbkb() ? Custom : Expand);
329 }
330
331 if (Subtarget.hasStdExtZbb() ||
332 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
334 Legal);
335 }
336
337 if (Subtarget.hasStdExtZbb() ||
338 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
339 if (Subtarget.is64Bit())
341 } else {
343 }
344
345 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
346 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
347 // We need the custom lowering to make sure that the resulting sequence
348 // for the 32bit case is efficient on 64bit targets.
349 if (Subtarget.is64Bit())
351 } else {
353 }
354
355 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
357 } else if (Subtarget.hasShortForwardBranchOpt()) {
358 // We can use PseudoCCSUB to implement ABS.
360 } else if (Subtarget.is64Bit()) {
362 }
363
364 if (!Subtarget.hasVendorXTHeadCondMov())
366
367 static const unsigned FPLegalNodeTypes[] = {
375
376 static const ISD::CondCode FPCCToExpand[] = {
380
381 static const unsigned FPOpToExpand[] = {
383 ISD::FREM};
384
385 static const unsigned FPRndMode[] = {
388
389 if (Subtarget.hasStdExtZfhminOrZhinxmin())
391
392 static const unsigned ZfhminZfbfminPromoteOps[] = {
407
408 if (Subtarget.hasStdExtZfbfmin()) {
417 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
419 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
420 // DAGCombiner::visitFP_ROUND probably needs improvements first.
422 }
423
424 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
425 if (Subtarget.hasStdExtZfhOrZhinx()) {
426 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
427 setOperationAction(FPRndMode, MVT::f16,
428 Subtarget.hasStdExtZfa() ? Legal : Custom);
431 } else {
432 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
435 MVT::f16, Legal);
436 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
437 // DAGCombiner::visitFP_ROUND probably needs improvements first.
439 }
440
443 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
446
448 Subtarget.hasStdExtZfa() ? Legal : Promote);
453 MVT::f16, Promote);
454
455 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
456 // complete support for all operations in LegalizeDAG.
461 MVT::f16, Promote);
462
463 // We need to custom promote this.
464 if (Subtarget.is64Bit())
466
468 Subtarget.hasStdExtZfa() ? Legal : Custom);
469 }
470
471 if (Subtarget.hasStdExtFOrZfinx()) {
472 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
473 setOperationAction(FPRndMode, MVT::f32,
474 Subtarget.hasStdExtZfa() ? Legal : Custom);
475 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
479 setOperationAction(FPOpToExpand, MVT::f32, Expand);
480 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
481 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
482 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
483 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
487 Subtarget.isSoftFPABI() ? LibCall : Custom);
490
491 if (Subtarget.hasStdExtZfa()) {
494 } else {
496 }
497 }
498
499 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
501
502 if (Subtarget.hasStdExtDOrZdinx()) {
503 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
504
505 if (!Subtarget.is64Bit())
507
508 if (Subtarget.hasStdExtZfa()) {
509 setOperationAction(FPRndMode, MVT::f64, Legal);
512 } else {
513 if (Subtarget.is64Bit())
514 setOperationAction(FPRndMode, MVT::f64, Custom);
515
517 }
518
521 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
525 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
526 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
527 setOperationAction(FPOpToExpand, MVT::f64, Expand);
528 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
529 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
530 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
531 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
535 Subtarget.isSoftFPABI() ? LibCall : Custom);
538 }
539
540 if (Subtarget.is64Bit()) {
543 MVT::i32, Custom);
545 }
546
547 if (Subtarget.hasStdExtFOrZfinx()) {
549 Custom);
550
553 XLenVT, Legal);
554
557 }
558
561 XLenVT, Custom);
562
564
565 if (Subtarget.is64Bit())
567
568 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
569 // Unfortunately this can't be determined just from the ISA naming string.
571 Subtarget.is64Bit() ? Legal : Custom);
573 Subtarget.is64Bit() ? Legal : Custom);
574
577 if (Subtarget.is64Bit())
579
580 if (Subtarget.hasStdExtZicbop()) {
582 }
583
584 if (Subtarget.hasStdExtA()) {
586 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
588 else
590 } else if (Subtarget.hasForcedAtomics()) {
592 } else {
594 }
595
597
599
600 if (getTargetMachine().getTargetTriple().isOSLinux()) {
601 // Custom lowering of llvm.clear_cache.
603 }
604
605 if (Subtarget.hasVInstructions()) {
607
609
610 // RVV intrinsics may have illegal operands.
611 // We also need to custom legalize vmv.x.s.
614 {MVT::i8, MVT::i16}, Custom);
615 if (Subtarget.is64Bit())
617 MVT::i32, Custom);
618 else
620 MVT::i64, Custom);
621
623 MVT::Other, Custom);
624
625 static const unsigned IntegerVPOps[] = {
626 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
627 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
628 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
629 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
630 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
631 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
632 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
633 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
634 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
635 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
636 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
637 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
638 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
639 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
640 ISD::EXPERIMENTAL_VP_SPLAT};
641
642 static const unsigned FloatingPointVPOps[] = {
643 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
644 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
645 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
646 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
647 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
648 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
649 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
650 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
651 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
652 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
653 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
654 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
655 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
656 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
657
658 static const unsigned IntegerVecReduceOps[] = {
662
663 static const unsigned FloatingPointVecReduceOps[] = {
666
667 if (!Subtarget.is64Bit()) {
668 // We must custom-lower certain vXi64 operations on RV32 due to the vector
669 // element type being illegal.
671 MVT::i64, Custom);
672
673 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
674
675 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
676 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
677 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
678 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
679 MVT::i64, Custom);
680 }
681
682 for (MVT VT : BoolVecVTs) {
683 if (!isTypeLegal(VT))
684 continue;
685
687
688 // Mask VTs are custom-expanded into a series of standard nodes
692 VT, Custom);
693
695 Custom);
696
699 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
700 Expand);
701
702 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
703 Custom);
704
705 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
706
709 Custom);
710
712 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
713 Custom);
714
715 // RVV has native int->float & float->int conversions where the
716 // element type sizes are within one power-of-two of each other. Any
717 // wider distances between type sizes have to be lowered as sequences
718 // which progressively narrow the gap in stages.
723 VT, Custom);
725 Custom);
726
727 // Expand all extending loads to types larger than this, and truncating
728 // stores from types larger than this.
730 setTruncStoreAction(VT, OtherVT, Expand);
732 OtherVT, Expand);
733 }
734
735 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
736 ISD::VP_TRUNCATE, ISD::VP_SETCC},
737 VT, Custom);
738
741
743
744 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
745 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
746
749 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
750 }
751
752 for (MVT VT : IntVecVTs) {
753 if (!isTypeLegal(VT))
754 continue;
755
758
759 // Vectors implement MULHS/MULHU.
761
762 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
763 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
765
767 Legal);
768
770
771 // Custom-lower extensions and truncations from/to mask types.
773 VT, Custom);
774
775 // RVV has native int->float & float->int conversions where the
776 // element type sizes are within one power-of-two of each other. Any
777 // wider distances between type sizes have to be lowered as sequences
778 // which progressively narrow the gap in stages.
783 VT, Custom);
785 Custom);
789 VT, Legal);
790
791 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
792 // nodes which truncate by one power of two at a time.
795 Custom);
796
797 // Custom-lower insert/extract operations to simplify patterns.
799 Custom);
800
801 // Custom-lower reduction operations to set up the corresponding custom
802 // nodes' operands.
803 setOperationAction(IntegerVecReduceOps, VT, Custom);
804
805 setOperationAction(IntegerVPOps, VT, Custom);
806
808
810 VT, Custom);
811
813 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
814 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
815 VT, Custom);
816
819 VT, Custom);
820
823
825
827 setTruncStoreAction(VT, OtherVT, Expand);
829 OtherVT, Expand);
830 }
831
834
835 // Splice
837
838 if (Subtarget.hasStdExtZvkb()) {
840 setOperationAction(ISD::VP_BSWAP, VT, Custom);
841 } else {
842 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
844 }
845
846 if (Subtarget.hasStdExtZvbb()) {
848 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
849 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
850 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
851 VT, Custom);
852 } else {
853 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
855 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
856 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
857 VT, Expand);
858
859 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
860 // range of f32.
861 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
862 if (isTypeLegal(FloatVT)) {
864 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
865 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
866 VT, Custom);
867 }
868 }
869 }
870
871 // Expand various CCs to best match the RVV ISA, which natively supports UNE
872 // but no other unordered comparisons, and supports all ordered comparisons
873 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
874 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
875 // and we pattern-match those back to the "original", swapping operands once
876 // more. This way we catch both operations and both "vf" and "fv" forms with
877 // fewer patterns.
878 static const ISD::CondCode VFPCCToExpand[] = {
882 };
883
884 // TODO: support more ops.
885 static const unsigned ZvfhminPromoteOps[] = {
893
894 // TODO: support more vp ops.
895 static const unsigned ZvfhminPromoteVPOps[] = {
896 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
897 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
898 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
899 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
900 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
901 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
902 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
903 ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
904 ISD::VP_FMAXIMUM, ISD::VP_REDUCE_FMINIMUM, ISD::VP_REDUCE_FMAXIMUM};
905
906 // Sets common operation actions on RVV floating-point vector types.
907 const auto SetCommonVFPActions = [&](MVT VT) {
909 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
910 // sizes are within one power-of-two of each other. Therefore conversions
911 // between vXf16 and vXf64 must be lowered as sequences which convert via
912 // vXf32.
915 // Custom-lower insert/extract operations to simplify patterns.
917 Custom);
918 // Expand various condition codes (explained above).
919 setCondCodeAction(VFPCCToExpand, VT, Expand);
920
923
927 VT, Custom);
928
929 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
930
931 // Expand FP operations that need libcalls.
943
945
947
949 VT, Custom);
950
952 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
953 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
954 VT, Custom);
955
958
961 VT, Custom);
962
965
967
968 setOperationAction(FloatingPointVPOps, VT, Custom);
969
971 Custom);
974 VT, Legal);
979 VT, Custom);
980 };
981
982 // Sets common extload/truncstore actions on RVV floating-point vector
983 // types.
984 const auto SetCommonVFPExtLoadTruncStoreActions =
985 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
986 for (auto SmallVT : SmallerVTs) {
987 setTruncStoreAction(VT, SmallVT, Expand);
988 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
989 }
990 };
991
992 if (Subtarget.hasVInstructionsF16()) {
993 for (MVT VT : F16VecVTs) {
994 if (!isTypeLegal(VT))
995 continue;
996 SetCommonVFPActions(VT);
997 }
998 } else if (Subtarget.hasVInstructionsF16Minimal()) {
999 for (MVT VT : F16VecVTs) {
1000 if (!isTypeLegal(VT))
1001 continue;
1004 Custom);
1005 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1006 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1007 Custom);
1010 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1011 VT, Custom);
1014 VT, Custom);
1015 if (Subtarget.hasStdExtZfhmin())
1017 // load/store
1019
1020 // Custom split nxv32f16 since nxv32f32 if not legal.
1021 if (VT == MVT::nxv32f16) {
1022 setOperationAction(ZvfhminPromoteOps, VT, Custom);
1023 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1024 continue;
1025 }
1026 // Add more promote ops.
1027 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1028 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1029 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1030 }
1031 }
1032
1033 // TODO: Could we merge some code with zvfhmin?
1034 if (Subtarget.hasVInstructionsBF16Minimal()) {
1035 for (MVT VT : BF16VecVTs) {
1036 if (!isTypeLegal(VT))
1037 continue;
1039 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1041 Custom);
1044 VT, Custom);
1046 if (Subtarget.hasStdExtZfbfmin())
1048 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1049 Custom);
1051 // TODO: Promote to fp32.
1052 }
1053 }
1054
1055 if (Subtarget.hasVInstructionsF32()) {
1056 for (MVT VT : F32VecVTs) {
1057 if (!isTypeLegal(VT))
1058 continue;
1059 SetCommonVFPActions(VT);
1060 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1061 }
1062 }
1063
1064 if (Subtarget.hasVInstructionsF64()) {
1065 for (MVT VT : F64VecVTs) {
1066 if (!isTypeLegal(VT))
1067 continue;
1068 SetCommonVFPActions(VT);
1069 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1070 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1071 }
1072 }
1073
1074 if (Subtarget.useRVVForFixedLengthVectors()) {
1076 if (!useRVVForFixedLengthVectorVT(VT))
1077 continue;
1078
1079 // By default everything must be expanded.
1080 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1083 setTruncStoreAction(VT, OtherVT, Expand);
1085 OtherVT, Expand);
1086 }
1087
1088 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1089 // expansion to a build_vector of 0s.
1091
1092 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1094 Custom);
1095
1098 Custom);
1099
1101 VT, Custom);
1102
1104
1106
1108
1110
1113 Custom);
1114
1116
1119 Custom);
1120
1122 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1123 Custom);
1124
1126 {
1135 },
1136 VT, Custom);
1138 Custom);
1139
1141
1142 // Operations below are different for between masks and other vectors.
1143 if (VT.getVectorElementType() == MVT::i1) {
1144 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1145 ISD::OR, ISD::XOR},
1146 VT, Custom);
1147
1148 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1149 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1150 VT, Custom);
1151
1152 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1153 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1154 continue;
1155 }
1156
1157 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1158 // it before type legalization for i64 vectors on RV32. It will then be
1159 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1160 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1161 // improvements first.
1162 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1165 }
1166
1169
1170 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1171 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1172 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1173 ISD::VP_SCATTER},
1174 VT, Custom);
1175
1179 VT, Custom);
1180
1183
1185
1186 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1187 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1189
1193 VT, Custom);
1194
1196
1199
1200 // Custom-lower reduction operations to set up the corresponding custom
1201 // nodes' operands.
1205 VT, Custom);
1206
1207 setOperationAction(IntegerVPOps, VT, Custom);
1208
1209 if (Subtarget.hasStdExtZvkb())
1211
1212 if (Subtarget.hasStdExtZvbb()) {
1215 VT, Custom);
1216 } else {
1217 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1218 // range of f32.
1219 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1220 if (isTypeLegal(FloatVT))
1223 Custom);
1224 }
1225 }
1226
1228 // There are no extending loads or truncating stores.
1229 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1230 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1231 setTruncStoreAction(VT, InnerVT, Expand);
1232 }
1233
1234 if (!useRVVForFixedLengthVectorVT(VT))
1235 continue;
1236
1237 // By default everything must be expanded.
1238 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1240
1241 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1242 // expansion to a build_vector of 0s.
1244
1247 VT, Custom);
1248
1249 // FIXME: mload, mstore, mgather, mscatter, vp_load/store,
1250 // vp_stride_load/store, vp_gather/scatter can be hoisted to here.
1252
1255 Custom);
1256
1257 if (VT.getVectorElementType() == MVT::f16 &&
1258 !Subtarget.hasVInstructionsF16()) {
1259 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1261 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1262 Custom);
1264 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1265 VT, Custom);
1267 if (Subtarget.hasStdExtZfhmin()) {
1268 // FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR.
1270 } else {
1271 // We need to custom legalize f16 build vectors if Zfhmin isn't
1272 // available.
1274 }
1275 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1276 // Don't promote f16 vector operations to f32 if f32 vector type is
1277 // not legal.
1278 // TODO: could split the f16 vector into two vectors and do promotion.
1279 if (!isTypeLegal(F32VecVT))
1280 continue;
1281 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1282 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1283 continue;
1284 }
1285
1286 if (VT.getVectorElementType() == MVT::bf16) {
1287 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1288 // FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR.
1291 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1292 Custom);
1293 // TODO: Promote to fp32.
1294 continue;
1295 }
1296
1299 VT, Custom);
1300
1303
1304 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1305 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1306 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1307 ISD::VP_SCATTER},
1308 VT, Custom);
1309
1314 VT, Custom);
1315
1318 VT, Custom);
1319
1320 setCondCodeAction(VFPCCToExpand, VT, Expand);
1321
1324
1326
1327 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1328
1329 setOperationAction(FloatingPointVPOps, VT, Custom);
1330
1337 VT, Custom);
1338 }
1339
1340 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1341 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1342 Custom);
1343 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1345 if (Subtarget.hasStdExtFOrZfinx())
1347 if (Subtarget.hasStdExtDOrZdinx())
1349 }
1350 }
1351
1352 if (Subtarget.hasStdExtA())
1354
1355 if (Subtarget.hasForcedAtomics()) {
1356 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1362 XLenVT, LibCall);
1363 }
1364
1365 if (Subtarget.hasVendorXTHeadMemIdx()) {
1366 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1367 setIndexedLoadAction(im, MVT::i8, Legal);
1368 setIndexedStoreAction(im, MVT::i8, Legal);
1369 setIndexedLoadAction(im, MVT::i16, Legal);
1370 setIndexedStoreAction(im, MVT::i16, Legal);
1371 setIndexedLoadAction(im, MVT::i32, Legal);
1372 setIndexedStoreAction(im, MVT::i32, Legal);
1373
1374 if (Subtarget.is64Bit()) {
1375 setIndexedLoadAction(im, MVT::i64, Legal);
1376 setIndexedStoreAction(im, MVT::i64, Legal);
1377 }
1378 }
1379 }
1380
1381 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1385
1389 }
1390
1391 // Function alignments.
1392 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1393 setMinFunctionAlignment(FunctionAlignment);
1394 // Set preferred alignments.
1397
1402
1403 if (Subtarget.hasStdExtFOrZfinx())
1405
1406 if (Subtarget.hasStdExtZbb())
1408
1409 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1410 Subtarget.hasVInstructions())
1412
1413 if (Subtarget.hasStdExtZbkb())
1415 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1417 if (Subtarget.hasStdExtFOrZfinx())
1420 if (Subtarget.hasVInstructions())
1422 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1425 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1428 if (Subtarget.hasVendorXTHeadMemPair())
1430 if (Subtarget.useRVVForFixedLengthVectors())
1432
1433 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1434 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1435
1436 // Disable strict node mutation.
1437 IsStrictFPEnabled = true;
1438 EnableExtLdPromotion = true;
1439
1440 // Let the subtarget decide if a predictable select is more expensive than the
1441 // corresponding branch. This information is used in CGP/SelectOpt to decide
1442 // when to convert selects into branches.
1443 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1444}
1445
1447 LLVMContext &Context,
1448 EVT VT) const {
1449 if (!VT.isVector())
1450 return getPointerTy(DL);
1451 if (Subtarget.hasVInstructions() &&
1452 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1453 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1455}
1456
1457MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1458 return Subtarget.getXLenVT();
1459}
1460
1461// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1462bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1463 unsigned VF,
1464 bool IsScalable) const {
1465 if (!Subtarget.hasVInstructions())
1466 return true;
1467
1468 if (!IsScalable)
1469 return true;
1470
1471 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1472 return true;
1473
1474 // Don't allow VF=1 if those types are't legal.
1475 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1476 return true;
1477
1478 // VLEN=32 support is incomplete.
1479 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1480 return true;
1481
1482 // The maximum VF is for the smallest element width with LMUL=8.
1483 // VF must be a power of 2.
1484 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1485 return VF > MaxVF || !isPowerOf2_32(VF);
1486}
1487
1489 return !Subtarget.hasVInstructions() ||
1490 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1491}
1492
1494 const CallInst &I,
1495 MachineFunction &MF,
1496 unsigned Intrinsic) const {
1497 auto &DL = I.getDataLayout();
1498
1499 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1500 bool IsUnitStrided, bool UsePtrVal = false) {
1502 // We can't use ptrVal if the intrinsic can access memory before the
1503 // pointer. This means we can't use it for strided or indexed intrinsics.
1504 if (UsePtrVal)
1505 Info.ptrVal = I.getArgOperand(PtrOp);
1506 else
1507 Info.fallbackAddressSpace =
1508 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1509 Type *MemTy;
1510 if (IsStore) {
1511 // Store value is the first operand.
1512 MemTy = I.getArgOperand(0)->getType();
1513 } else {
1514 // Use return type. If it's segment load, return type is a struct.
1515 MemTy = I.getType();
1516 if (MemTy->isStructTy())
1517 MemTy = MemTy->getStructElementType(0);
1518 }
1519 if (!IsUnitStrided)
1520 MemTy = MemTy->getScalarType();
1521
1522 Info.memVT = getValueType(DL, MemTy);
1523 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1525 Info.flags |=
1527 return true;
1528 };
1529
1530 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1532
1534 switch (Intrinsic) {
1535 default:
1536 return false;
1537 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1538 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1539 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1540 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1541 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1542 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1543 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1544 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1545 case Intrinsic::riscv_masked_cmpxchg_i32:
1547 Info.memVT = MVT::i32;
1548 Info.ptrVal = I.getArgOperand(0);
1549 Info.offset = 0;
1550 Info.align = Align(4);
1553 return true;
1554 case Intrinsic::riscv_seg2_load:
1555 case Intrinsic::riscv_seg3_load:
1556 case Intrinsic::riscv_seg4_load:
1557 case Intrinsic::riscv_seg5_load:
1558 case Intrinsic::riscv_seg6_load:
1559 case Intrinsic::riscv_seg7_load:
1560 case Intrinsic::riscv_seg8_load:
1561 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1562 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1563 case Intrinsic::riscv_seg2_store:
1564 case Intrinsic::riscv_seg3_store:
1565 case Intrinsic::riscv_seg4_store:
1566 case Intrinsic::riscv_seg5_store:
1567 case Intrinsic::riscv_seg6_store:
1568 case Intrinsic::riscv_seg7_store:
1569 case Intrinsic::riscv_seg8_store:
1570 // Operands are (vec, ..., vec, ptr, vl)
1571 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1572 /*IsStore*/ true,
1573 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1574 case Intrinsic::riscv_vle:
1575 case Intrinsic::riscv_vle_mask:
1576 case Intrinsic::riscv_vleff:
1577 case Intrinsic::riscv_vleff_mask:
1578 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1579 /*IsStore*/ false,
1580 /*IsUnitStrided*/ true,
1581 /*UsePtrVal*/ true);
1582 case Intrinsic::riscv_vse:
1583 case Intrinsic::riscv_vse_mask:
1584 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1585 /*IsStore*/ true,
1586 /*IsUnitStrided*/ true,
1587 /*UsePtrVal*/ true);
1588 case Intrinsic::riscv_vlse:
1589 case Intrinsic::riscv_vlse_mask:
1590 case Intrinsic::riscv_vloxei:
1591 case Intrinsic::riscv_vloxei_mask:
1592 case Intrinsic::riscv_vluxei:
1593 case Intrinsic::riscv_vluxei_mask:
1594 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1595 /*IsStore*/ false,
1596 /*IsUnitStrided*/ false);
1597 case Intrinsic::riscv_vsse:
1598 case Intrinsic::riscv_vsse_mask:
1599 case Intrinsic::riscv_vsoxei:
1600 case Intrinsic::riscv_vsoxei_mask:
1601 case Intrinsic::riscv_vsuxei:
1602 case Intrinsic::riscv_vsuxei_mask:
1603 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1604 /*IsStore*/ true,
1605 /*IsUnitStrided*/ false);
1606 case Intrinsic::riscv_vlseg2:
1607 case Intrinsic::riscv_vlseg3:
1608 case Intrinsic::riscv_vlseg4:
1609 case Intrinsic::riscv_vlseg5:
1610 case Intrinsic::riscv_vlseg6:
1611 case Intrinsic::riscv_vlseg7:
1612 case Intrinsic::riscv_vlseg8:
1613 case Intrinsic::riscv_vlseg2ff:
1614 case Intrinsic::riscv_vlseg3ff:
1615 case Intrinsic::riscv_vlseg4ff:
1616 case Intrinsic::riscv_vlseg5ff:
1617 case Intrinsic::riscv_vlseg6ff:
1618 case Intrinsic::riscv_vlseg7ff:
1619 case Intrinsic::riscv_vlseg8ff:
1620 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1621 /*IsStore*/ false,
1622 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1623 case Intrinsic::riscv_vlseg2_mask:
1624 case Intrinsic::riscv_vlseg3_mask:
1625 case Intrinsic::riscv_vlseg4_mask:
1626 case Intrinsic::riscv_vlseg5_mask:
1627 case Intrinsic::riscv_vlseg6_mask:
1628 case Intrinsic::riscv_vlseg7_mask:
1629 case Intrinsic::riscv_vlseg8_mask:
1630 case Intrinsic::riscv_vlseg2ff_mask:
1631 case Intrinsic::riscv_vlseg3ff_mask:
1632 case Intrinsic::riscv_vlseg4ff_mask:
1633 case Intrinsic::riscv_vlseg5ff_mask:
1634 case Intrinsic::riscv_vlseg6ff_mask:
1635 case Intrinsic::riscv_vlseg7ff_mask:
1636 case Intrinsic::riscv_vlseg8ff_mask:
1637 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1638 /*IsStore*/ false,
1639 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1640 case Intrinsic::riscv_vlsseg2:
1641 case Intrinsic::riscv_vlsseg3:
1642 case Intrinsic::riscv_vlsseg4:
1643 case Intrinsic::riscv_vlsseg5:
1644 case Intrinsic::riscv_vlsseg6:
1645 case Intrinsic::riscv_vlsseg7:
1646 case Intrinsic::riscv_vlsseg8:
1647 case Intrinsic::riscv_vloxseg2:
1648 case Intrinsic::riscv_vloxseg3:
1649 case Intrinsic::riscv_vloxseg4:
1650 case Intrinsic::riscv_vloxseg5:
1651 case Intrinsic::riscv_vloxseg6:
1652 case Intrinsic::riscv_vloxseg7:
1653 case Intrinsic::riscv_vloxseg8:
1654 case Intrinsic::riscv_vluxseg2:
1655 case Intrinsic::riscv_vluxseg3:
1656 case Intrinsic::riscv_vluxseg4:
1657 case Intrinsic::riscv_vluxseg5:
1658 case Intrinsic::riscv_vluxseg6:
1659 case Intrinsic::riscv_vluxseg7:
1660 case Intrinsic::riscv_vluxseg8:
1661 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1662 /*IsStore*/ false,
1663 /*IsUnitStrided*/ false);
1664 case Intrinsic::riscv_vlsseg2_mask:
1665 case Intrinsic::riscv_vlsseg3_mask:
1666 case Intrinsic::riscv_vlsseg4_mask:
1667 case Intrinsic::riscv_vlsseg5_mask:
1668 case Intrinsic::riscv_vlsseg6_mask:
1669 case Intrinsic::riscv_vlsseg7_mask:
1670 case Intrinsic::riscv_vlsseg8_mask:
1671 case Intrinsic::riscv_vloxseg2_mask:
1672 case Intrinsic::riscv_vloxseg3_mask:
1673 case Intrinsic::riscv_vloxseg4_mask:
1674 case Intrinsic::riscv_vloxseg5_mask:
1675 case Intrinsic::riscv_vloxseg6_mask:
1676 case Intrinsic::riscv_vloxseg7_mask:
1677 case Intrinsic::riscv_vloxseg8_mask:
1678 case Intrinsic::riscv_vluxseg2_mask:
1679 case Intrinsic::riscv_vluxseg3_mask:
1680 case Intrinsic::riscv_vluxseg4_mask:
1681 case Intrinsic::riscv_vluxseg5_mask:
1682 case Intrinsic::riscv_vluxseg6_mask:
1683 case Intrinsic::riscv_vluxseg7_mask:
1684 case Intrinsic::riscv_vluxseg8_mask:
1685 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1686 /*IsStore*/ false,
1687 /*IsUnitStrided*/ false);
1688 case Intrinsic::riscv_vsseg2:
1689 case Intrinsic::riscv_vsseg3:
1690 case Intrinsic::riscv_vsseg4:
1691 case Intrinsic::riscv_vsseg5:
1692 case Intrinsic::riscv_vsseg6:
1693 case Intrinsic::riscv_vsseg7:
1694 case Intrinsic::riscv_vsseg8:
1695 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1696 /*IsStore*/ true,
1697 /*IsUnitStrided*/ false);
1698 case Intrinsic::riscv_vsseg2_mask:
1699 case Intrinsic::riscv_vsseg3_mask:
1700 case Intrinsic::riscv_vsseg4_mask:
1701 case Intrinsic::riscv_vsseg5_mask:
1702 case Intrinsic::riscv_vsseg6_mask:
1703 case Intrinsic::riscv_vsseg7_mask:
1704 case Intrinsic::riscv_vsseg8_mask:
1705 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1706 /*IsStore*/ true,
1707 /*IsUnitStrided*/ false);
1708 case Intrinsic::riscv_vssseg2:
1709 case Intrinsic::riscv_vssseg3:
1710 case Intrinsic::riscv_vssseg4:
1711 case Intrinsic::riscv_vssseg5:
1712 case Intrinsic::riscv_vssseg6:
1713 case Intrinsic::riscv_vssseg7:
1714 case Intrinsic::riscv_vssseg8:
1715 case Intrinsic::riscv_vsoxseg2:
1716 case Intrinsic::riscv_vsoxseg3:
1717 case Intrinsic::riscv_vsoxseg4:
1718 case Intrinsic::riscv_vsoxseg5:
1719 case Intrinsic::riscv_vsoxseg6:
1720 case Intrinsic::riscv_vsoxseg7:
1721 case Intrinsic::riscv_vsoxseg8:
1722 case Intrinsic::riscv_vsuxseg2:
1723 case Intrinsic::riscv_vsuxseg3:
1724 case Intrinsic::riscv_vsuxseg4:
1725 case Intrinsic::riscv_vsuxseg5:
1726 case Intrinsic::riscv_vsuxseg6:
1727 case Intrinsic::riscv_vsuxseg7:
1728 case Intrinsic::riscv_vsuxseg8:
1729 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1730 /*IsStore*/ true,
1731 /*IsUnitStrided*/ false);
1732 case Intrinsic::riscv_vssseg2_mask:
1733 case Intrinsic::riscv_vssseg3_mask:
1734 case Intrinsic::riscv_vssseg4_mask:
1735 case Intrinsic::riscv_vssseg5_mask:
1736 case Intrinsic::riscv_vssseg6_mask:
1737 case Intrinsic::riscv_vssseg7_mask:
1738 case Intrinsic::riscv_vssseg8_mask:
1739 case Intrinsic::riscv_vsoxseg2_mask:
1740 case Intrinsic::riscv_vsoxseg3_mask:
1741 case Intrinsic::riscv_vsoxseg4_mask:
1742 case Intrinsic::riscv_vsoxseg5_mask:
1743 case Intrinsic::riscv_vsoxseg6_mask:
1744 case Intrinsic::riscv_vsoxseg7_mask:
1745 case Intrinsic::riscv_vsoxseg8_mask:
1746 case Intrinsic::riscv_vsuxseg2_mask:
1747 case Intrinsic::riscv_vsuxseg3_mask:
1748 case Intrinsic::riscv_vsuxseg4_mask:
1749 case Intrinsic::riscv_vsuxseg5_mask:
1750 case Intrinsic::riscv_vsuxseg6_mask:
1751 case Intrinsic::riscv_vsuxseg7_mask:
1752 case Intrinsic::riscv_vsuxseg8_mask:
1753 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1754 /*IsStore*/ true,
1755 /*IsUnitStrided*/ false);
1756 }
1757}
1758
1760 const AddrMode &AM, Type *Ty,
1761 unsigned AS,
1762 Instruction *I) const {
1763 // No global is ever allowed as a base.
1764 if (AM.BaseGV)
1765 return false;
1766
1767 // None of our addressing modes allows a scalable offset
1768 if (AM.ScalableOffset)
1769 return false;
1770
1771 // RVV instructions only support register addressing.
1772 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1773 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1774
1775 // Require a 12-bit signed offset.
1776 if (!isInt<12>(AM.BaseOffs))
1777 return false;
1778
1779 switch (AM.Scale) {
1780 case 0: // "r+i" or just "i", depending on HasBaseReg.
1781 break;
1782 case 1:
1783 if (!AM.HasBaseReg) // allow "r+i".
1784 break;
1785 return false; // disallow "r+r" or "r+r+i".
1786 default:
1787 return false;
1788 }
1789
1790 return true;
1791}
1792
1794 return isInt<12>(Imm);
1795}
1796
1798 return isInt<12>(Imm);
1799}
1800
1801// On RV32, 64-bit integers are split into their high and low parts and held
1802// in two different registers, so the trunc is free since the low register can
1803// just be used.
1804// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1805// isTruncateFree?
1807 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1808 return false;
1809 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1810 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1811 return (SrcBits == 64 && DestBits == 32);
1812}
1813
1815 // We consider i64->i32 free on RV64 since we have good selection of W
1816 // instructions that make promoting operations back to i64 free in many cases.
1817 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1818 !DstVT.isInteger())
1819 return false;
1820 unsigned SrcBits = SrcVT.getSizeInBits();
1821 unsigned DestBits = DstVT.getSizeInBits();
1822 return (SrcBits == 64 && DestBits == 32);
1823}
1824
1826 EVT SrcVT = Val.getValueType();
1827 // free truncate from vnsrl and vnsra
1828 if (Subtarget.hasVInstructions() &&
1829 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
1830 SrcVT.isVector() && VT2.isVector()) {
1831 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
1832 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
1833 if (SrcBits == DestBits * 2) {
1834 return true;
1835 }
1836 }
1837 return TargetLowering::isTruncateFree(Val, VT2);
1838}
1839
1841 // Zexts are free if they can be combined with a load.
1842 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1843 // poorly with type legalization of compares preferring sext.
1844 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1845 EVT MemVT = LD->getMemoryVT();
1846 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1847 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1848 LD->getExtensionType() == ISD::ZEXTLOAD))
1849 return true;
1850 }
1851
1852 return TargetLowering::isZExtFree(Val, VT2);
1853}
1854
1856 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1857}
1858
1860 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1861}
1862
1864 return Subtarget.hasStdExtZbb() ||
1865 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
1866}
1867
1869 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
1870 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
1871}
1872
1874 const Instruction &AndI) const {
1875 // We expect to be able to match a bit extraction instruction if the Zbs
1876 // extension is supported and the mask is a power of two. However, we
1877 // conservatively return false if the mask would fit in an ANDI instruction,
1878 // on the basis that it's possible the sinking+duplication of the AND in
1879 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1880 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1881 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1882 return false;
1883 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1884 if (!Mask)
1885 return false;
1886 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1887}
1888
1890 EVT VT = Y.getValueType();
1891
1892 // FIXME: Support vectors once we have tests.
1893 if (VT.isVector())
1894 return false;
1895
1896 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1897 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
1898}
1899
1901 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1902 if (Subtarget.hasStdExtZbs())
1903 return X.getValueType().isScalarInteger();
1904 auto *C = dyn_cast<ConstantSDNode>(Y);
1905 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1906 if (Subtarget.hasVendorXTHeadBs())
1907 return C != nullptr;
1908 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1909 return C && C->getAPIntValue().ule(10);
1910}
1911
1913 EVT VT) const {
1914 // Only enable for rvv.
1915 if (!VT.isVector() || !Subtarget.hasVInstructions())
1916 return false;
1917
1918 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1919 return false;
1920
1921 return true;
1922}
1923
1925 Type *Ty) const {
1926 assert(Ty->isIntegerTy());
1927
1928 unsigned BitSize = Ty->getIntegerBitWidth();
1929 if (BitSize > Subtarget.getXLen())
1930 return false;
1931
1932 // Fast path, assume 32-bit immediates are cheap.
1933 int64_t Val = Imm.getSExtValue();
1934 if (isInt<32>(Val))
1935 return true;
1936
1937 // A constant pool entry may be more aligned thant he load we're trying to
1938 // replace. If we don't support unaligned scalar mem, prefer the constant
1939 // pool.
1940 // TODO: Can the caller pass down the alignment?
1941 if (!Subtarget.enableUnalignedScalarMem())
1942 return true;
1943
1944 // Prefer to keep the load if it would require many instructions.
1945 // This uses the same threshold we use for constant pools but doesn't
1946 // check useConstantPoolForLargeInts.
1947 // TODO: Should we keep the load only when we're definitely going to emit a
1948 // constant pool?
1949
1951 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1952}
1953
1957 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1958 SelectionDAG &DAG) const {
1959 // One interesting pattern that we'd want to form is 'bit extract':
1960 // ((1 >> Y) & 1) ==/!= 0
1961 // But we also need to be careful not to try to reverse that fold.
1962
1963 // Is this '((1 >> Y) & 1)'?
1964 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1965 return false; // Keep the 'bit extract' pattern.
1966
1967 // Will this be '((1 >> Y) & 1)' after the transform?
1968 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1969 return true; // Do form the 'bit extract' pattern.
1970
1971 // If 'X' is a constant, and we transform, then we will immediately
1972 // try to undo the fold, thus causing endless combine loop.
1973 // So only do the transform if X is not a constant. This matches the default
1974 // implementation of this function.
1975 return !XC;
1976}
1977
1978bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1979 switch (Opcode) {
1980 case Instruction::Add:
1981 case Instruction::Sub:
1982 case Instruction::Mul:
1983 case Instruction::And:
1984 case Instruction::Or:
1985 case Instruction::Xor:
1986 case Instruction::FAdd:
1987 case Instruction::FSub:
1988 case Instruction::FMul:
1989 case Instruction::FDiv:
1990 case Instruction::ICmp:
1991 case Instruction::FCmp:
1992 return true;
1993 case Instruction::Shl:
1994 case Instruction::LShr:
1995 case Instruction::AShr:
1996 case Instruction::UDiv:
1997 case Instruction::SDiv:
1998 case Instruction::URem:
1999 case Instruction::SRem:
2000 case Instruction::Select:
2001 return Operand == 1;
2002 default:
2003 return false;
2004 }
2005}
2006
2007
2009 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2010 return false;
2011
2012 if (canSplatOperand(I->getOpcode(), Operand))
2013 return true;
2014
2015 auto *II = dyn_cast<IntrinsicInst>(I);
2016 if (!II)
2017 return false;
2018
2019 switch (II->getIntrinsicID()) {
2020 case Intrinsic::fma:
2021 case Intrinsic::vp_fma:
2022 return Operand == 0 || Operand == 1;
2023 case Intrinsic::vp_shl:
2024 case Intrinsic::vp_lshr:
2025 case Intrinsic::vp_ashr:
2026 case Intrinsic::vp_udiv:
2027 case Intrinsic::vp_sdiv:
2028 case Intrinsic::vp_urem:
2029 case Intrinsic::vp_srem:
2030 case Intrinsic::ssub_sat:
2031 case Intrinsic::vp_ssub_sat:
2032 case Intrinsic::usub_sat:
2033 case Intrinsic::vp_usub_sat:
2034 return Operand == 1;
2035 // These intrinsics are commutative.
2036 case Intrinsic::vp_add:
2037 case Intrinsic::vp_mul:
2038 case Intrinsic::vp_and:
2039 case Intrinsic::vp_or:
2040 case Intrinsic::vp_xor:
2041 case Intrinsic::vp_fadd:
2042 case Intrinsic::vp_fmul:
2043 case Intrinsic::vp_icmp:
2044 case Intrinsic::vp_fcmp:
2045 case Intrinsic::smin:
2046 case Intrinsic::vp_smin:
2047 case Intrinsic::umin:
2048 case Intrinsic::vp_umin:
2049 case Intrinsic::smax:
2050 case Intrinsic::vp_smax:
2051 case Intrinsic::umax:
2052 case Intrinsic::vp_umax:
2053 case Intrinsic::sadd_sat:
2054 case Intrinsic::vp_sadd_sat:
2055 case Intrinsic::uadd_sat:
2056 case Intrinsic::vp_uadd_sat:
2057 // These intrinsics have 'vr' versions.
2058 case Intrinsic::vp_sub:
2059 case Intrinsic::vp_fsub:
2060 case Intrinsic::vp_fdiv:
2061 return Operand == 0 || Operand == 1;
2062 default:
2063 return false;
2064 }
2065}
2066
2067/// Check if sinking \p I's operands to I's basic block is profitable, because
2068/// the operands can be folded into a target instruction, e.g.
2069/// splats of scalars can fold into vector instructions.
2071 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
2072 using namespace llvm::PatternMatch;
2073
2074 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2075 return false;
2076
2077 // Don't sink splat operands if the target prefers it. Some targets requires
2078 // S2V transfer buffers and we can run out of them copying the same value
2079 // repeatedly.
2080 // FIXME: It could still be worth doing if it would improve vector register
2081 // pressure and prevent a vector spill.
2082 if (!Subtarget.sinkSplatOperands())
2083 return false;
2084
2085 for (auto OpIdx : enumerate(I->operands())) {
2086 if (!canSplatOperand(I, OpIdx.index()))
2087 continue;
2088
2089 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2090 // Make sure we are not already sinking this operand
2091 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2092 continue;
2093
2094 // We are looking for a splat that can be sunk.
2096 m_Undef(), m_ZeroMask())))
2097 continue;
2098
2099 // Don't sink i1 splats.
2100 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2101 continue;
2102
2103 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2104 // and vector registers
2105 for (Use &U : Op->uses()) {
2106 Instruction *Insn = cast<Instruction>(U.getUser());
2107 if (!canSplatOperand(Insn, U.getOperandNo()))
2108 return false;
2109 }
2110
2111 Ops.push_back(&Op->getOperandUse(0));
2112 Ops.push_back(&OpIdx.value());
2113 }
2114 return true;
2115}
2116
2118 unsigned Opc = VecOp.getOpcode();
2119
2120 // Assume target opcodes can't be scalarized.
2121 // TODO - do we have any exceptions?
2122 if (Opc >= ISD::BUILTIN_OP_END)
2123 return false;
2124
2125 // If the vector op is not supported, try to convert to scalar.
2126 EVT VecVT = VecOp.getValueType();
2127 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2128 return true;
2129
2130 // If the vector op is supported, but the scalar op is not, the transform may
2131 // not be worthwhile.
2132 // Permit a vector binary operation can be converted to scalar binary
2133 // operation which is custom lowered with illegal type.
2134 EVT ScalarVT = VecVT.getScalarType();
2135 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2136 isOperationCustom(Opc, ScalarVT);
2137}
2138
2140 const GlobalAddressSDNode *GA) const {
2141 // In order to maximise the opportunity for common subexpression elimination,
2142 // keep a separate ADD node for the global address offset instead of folding
2143 // it in the global address node. Later peephole optimisations may choose to
2144 // fold it back in when profitable.
2145 return false;
2146}
2147
2148// Return one of the followings:
2149// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2150// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2151// positive counterpart, which will be materialized from the first returned
2152// element. The second returned element indicated that there should be a FNEG
2153// followed.
2154// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2155std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2156 EVT VT) const {
2157 if (!Subtarget.hasStdExtZfa())
2158 return std::make_pair(-1, false);
2159
2160 bool IsSupportedVT = false;
2161 if (VT == MVT::f16) {
2162 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2163 } else if (VT == MVT::f32) {
2164 IsSupportedVT = true;
2165 } else if (VT == MVT::f64) {
2166 assert(Subtarget.hasStdExtD() && "Expect D extension");
2167 IsSupportedVT = true;
2168 }
2169
2170 if (!IsSupportedVT)
2171 return std::make_pair(-1, false);
2172
2174 if (Index < 0 && Imm.isNegative())
2175 // Try the combination of its positive counterpart + FNEG.
2176 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
2177 else
2178 return std::make_pair(Index, false);
2179}
2180
2182 bool ForCodeSize) const {
2183 bool IsLegalVT = false;
2184 if (VT == MVT::f16)
2185 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2186 else if (VT == MVT::f32)
2187 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2188 else if (VT == MVT::f64)
2189 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2190 else if (VT == MVT::bf16)
2191 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2192
2193 if (!IsLegalVT)
2194 return false;
2195
2196 if (getLegalZfaFPImm(Imm, VT).first >= 0)
2197 return true;
2198
2199 // Cannot create a 64 bit floating-point immediate value for rv32.
2200 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2201 // td can handle +0.0 or -0.0 already.
2202 // -0.0 can be created by fmv + fneg.
2203 return Imm.isZero();
2204 }
2205
2206 // Special case: fmv + fneg
2207 if (Imm.isNegZero())
2208 return true;
2209
2210 // Building an integer and then converting requires a fmv at the end of
2211 // the integer sequence.
2212 const int Cost =
2213 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2214 Subtarget);
2215 return Cost <= FPImmCost;
2216}
2217
2218// TODO: This is very conservative.
2220 unsigned Index) const {
2222 return false;
2223
2224 // Only support extracting a fixed from a fixed vector for now.
2225 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2226 return false;
2227
2228 EVT EltVT = ResVT.getVectorElementType();
2229 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2230
2231 // The smallest type we can slide is i8.
2232 // TODO: We can extract index 0 from a mask vector without a slide.
2233 if (EltVT == MVT::i1)
2234 return false;
2235
2236 unsigned ResElts = ResVT.getVectorNumElements();
2237 unsigned SrcElts = SrcVT.getVectorNumElements();
2238
2239 unsigned MinVLen = Subtarget.getRealMinVLen();
2240 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2241
2242 // If we're extracting only data from the first VLEN bits of the source
2243 // then we can always do this with an m1 vslidedown.vx. Restricting the
2244 // Index ensures we can use a vslidedown.vi.
2245 // TODO: We can generalize this when the exact VLEN is known.
2246 if (Index + ResElts <= MinVLMAX && Index < 31)
2247 return true;
2248
2249 // Convervatively only handle extracting half of a vector.
2250 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2251 // a cheap extract. However, this case is important in practice for
2252 // shuffled extracts of longer vectors. How resolve?
2253 if ((ResElts * 2) != SrcElts)
2254 return false;
2255
2256 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2257 // cheap.
2258 if (Index >= 32)
2259 return false;
2260
2261 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2262 // the upper half of a vector until we have more test coverage.
2263 return Index == 0 || Index == ResElts;
2264}
2265
2268 EVT VT) const {
2269 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2270 // We might still end up using a GPR but that will be decided based on ABI.
2271 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2272 !Subtarget.hasStdExtZfhminOrZhinxmin())
2273 return MVT::f32;
2274
2276
2277 return PartVT;
2278}
2279
2282 EVT VT) const {
2283 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2284 // We might still end up using a GPR but that will be decided based on ABI.
2285 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2286 !Subtarget.hasStdExtZfhminOrZhinxmin())
2287 return 1;
2288
2290}
2291
2293 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2294 unsigned &NumIntermediates, MVT &RegisterVT) const {
2296 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2297
2298 return NumRegs;
2299}
2300
2301// Changes the condition code and swaps operands if necessary, so the SetCC
2302// operation matches one of the comparisons supported directly by branches
2303// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2304// with 1/-1.
2305static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2306 ISD::CondCode &CC, SelectionDAG &DAG) {
2307 // If this is a single bit test that can't be handled by ANDI, shift the
2308 // bit to be tested to the MSB and perform a signed compare with 0.
2309 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2310 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2311 isa<ConstantSDNode>(LHS.getOperand(1))) {
2312 uint64_t Mask = LHS.getConstantOperandVal(1);
2313 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2314 unsigned ShAmt = 0;
2315 if (isPowerOf2_64(Mask)) {
2317 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2318 } else {
2319 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2320 }
2321
2322 LHS = LHS.getOperand(0);
2323 if (ShAmt != 0)
2324 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2325 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2326 return;
2327 }
2328 }
2329
2330 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2331 int64_t C = RHSC->getSExtValue();
2332 switch (CC) {
2333 default: break;
2334 case ISD::SETGT:
2335 // Convert X > -1 to X >= 0.
2336 if (C == -1) {
2337 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2338 CC = ISD::SETGE;
2339 return;
2340 }
2341 break;
2342 case ISD::SETLT:
2343 // Convert X < 1 to 0 >= X.
2344 if (C == 1) {
2345 RHS = LHS;
2346 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2347 CC = ISD::SETGE;
2348 return;
2349 }
2350 break;
2351 }
2352 }
2353
2354 switch (CC) {
2355 default:
2356 break;
2357 case ISD::SETGT:
2358 case ISD::SETLE:
2359 case ISD::SETUGT:
2360 case ISD::SETULE:
2362 std::swap(LHS, RHS);
2363 break;
2364 }
2365}
2366
2368 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2369 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2370 if (VT.getVectorElementType() == MVT::i1)
2371 KnownSize *= 8;
2372
2373 switch (KnownSize) {
2374 default:
2375 llvm_unreachable("Invalid LMUL.");
2376 case 8:
2378 case 16:
2380 case 32:
2382 case 64:
2384 case 128:
2386 case 256:
2388 case 512:
2390 }
2391}
2392
2394 switch (LMul) {
2395 default:
2396 llvm_unreachable("Invalid LMUL.");
2401 return RISCV::VRRegClassID;
2403 return RISCV::VRM2RegClassID;
2405 return RISCV::VRM4RegClassID;
2407 return RISCV::VRM8RegClassID;
2408 }
2409}
2410
2412 RISCVII::VLMUL LMUL = getLMUL(VT);
2413 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2414 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2415 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2416 LMUL == RISCVII::VLMUL::LMUL_1) {
2417 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2418 "Unexpected subreg numbering");
2419 return RISCV::sub_vrm1_0 + Index;
2420 }
2421 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2422 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2423 "Unexpected subreg numbering");
2424 return RISCV::sub_vrm2_0 + Index;
2425 }
2426 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2427 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2428 "Unexpected subreg numbering");
2429 return RISCV::sub_vrm4_0 + Index;
2430 }
2431 llvm_unreachable("Invalid vector type.");
2432}
2433
2435 if (VT.getVectorElementType() == MVT::i1)
2436 return RISCV::VRRegClassID;
2437 return getRegClassIDForLMUL(getLMUL(VT));
2438}
2439
2440// Attempt to decompose a subvector insert/extract between VecVT and
2441// SubVecVT via subregister indices. Returns the subregister index that
2442// can perform the subvector insert/extract with the given element index, as
2443// well as the index corresponding to any leftover subvectors that must be
2444// further inserted/extracted within the register class for SubVecVT.
2445std::pair<unsigned, unsigned>
2447 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2448 const RISCVRegisterInfo *TRI) {
2449 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2450 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2451 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2452 "Register classes not ordered");
2453 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2454 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2455 // Try to compose a subregister index that takes us from the incoming
2456 // LMUL>1 register class down to the outgoing one. At each step we half
2457 // the LMUL:
2458 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2459 // Note that this is not guaranteed to find a subregister index, such as
2460 // when we are extracting from one VR type to another.
2461 unsigned SubRegIdx = RISCV::NoSubRegister;
2462 for (const unsigned RCID :
2463 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2464 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2465 VecVT = VecVT.getHalfNumVectorElementsVT();
2466 bool IsHi =
2467 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2468 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2469 getSubregIndexByMVT(VecVT, IsHi));
2470 if (IsHi)
2471 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2472 }
2473 return {SubRegIdx, InsertExtractIdx};
2474}
2475
2476// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2477// stores for those types.
2478bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2479 return !Subtarget.useRVVForFixedLengthVectors() ||
2480 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2481}
2482
2484 if (!ScalarTy.isSimple())
2485 return false;
2486 switch (ScalarTy.getSimpleVT().SimpleTy) {
2487 case MVT::iPTR:
2488 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2489 case MVT::i8:
2490 case MVT::i16:
2491 case MVT::i32:
2492 return true;
2493 case MVT::i64:
2494 return Subtarget.hasVInstructionsI64();
2495 case MVT::f16:
2496 return Subtarget.hasVInstructionsF16();
2497 case MVT::f32:
2498 return Subtarget.hasVInstructionsF32();
2499 case MVT::f64:
2500 return Subtarget.hasVInstructionsF64();
2501 default:
2502 return false;
2503 }
2504}
2505
2506
2507unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2508 return NumRepeatedDivisors;
2509}
2510
2512 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2513 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2514 "Unexpected opcode");
2515 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2516 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2518 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2519 if (!II)
2520 return SDValue();
2521 return Op.getOperand(II->VLOperand + 1 + HasChain);
2522}
2523
2525 const RISCVSubtarget &Subtarget) {
2526 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2527 if (!Subtarget.useRVVForFixedLengthVectors())
2528 return false;
2529
2530 // We only support a set of vector types with a consistent maximum fixed size
2531 // across all supported vector element types to avoid legalization issues.
2532 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2533 // fixed-length vector type we support is 1024 bytes.
2534 if (VT.getFixedSizeInBits() > 1024 * 8)
2535 return false;
2536
2537 unsigned MinVLen = Subtarget.getRealMinVLen();
2538
2539 MVT EltVT = VT.getVectorElementType();
2540
2541 // Don't use RVV for vectors we cannot scalarize if required.
2542 switch (EltVT.SimpleTy) {
2543 // i1 is supported but has different rules.
2544 default:
2545 return false;
2546 case MVT::i1:
2547 // Masks can only use a single register.
2548 if (VT.getVectorNumElements() > MinVLen)
2549 return false;
2550 MinVLen /= 8;
2551 break;
2552 case MVT::i8:
2553 case MVT::i16:
2554 case MVT::i32:
2555 break;
2556 case MVT::i64:
2557 if (!Subtarget.hasVInstructionsI64())
2558 return false;
2559 break;
2560 case MVT::f16:
2561 if (!Subtarget.hasVInstructionsF16Minimal())
2562 return false;
2563 break;
2564 case MVT::bf16:
2565 if (!Subtarget.hasVInstructionsBF16Minimal())
2566 return false;
2567 break;
2568 case MVT::f32:
2569 if (!Subtarget.hasVInstructionsF32())
2570 return false;
2571 break;
2572 case MVT::f64:
2573 if (!Subtarget.hasVInstructionsF64())
2574 return false;
2575 break;
2576 }
2577
2578 // Reject elements larger than ELEN.
2579 if (EltVT.getSizeInBits() > Subtarget.getELen())
2580 return false;
2581
2582 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2583 // Don't use RVV for types that don't fit.
2584 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2585 return false;
2586
2587 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2588 // the base fixed length RVV support in place.
2589 if (!VT.isPow2VectorType())
2590 return false;
2591
2592 return true;
2593}
2594
2595bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2596 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2597}
2598
2599// Return the largest legal scalable vector type that matches VT's element type.
2601 const RISCVSubtarget &Subtarget) {
2602 // This may be called before legal types are setup.
2603 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2604 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2605 "Expected legal fixed length vector!");
2606
2607 unsigned MinVLen = Subtarget.getRealMinVLen();
2608 unsigned MaxELen = Subtarget.getELen();
2609
2610 MVT EltVT = VT.getVectorElementType();
2611 switch (EltVT.SimpleTy) {
2612 default:
2613 llvm_unreachable("unexpected element type for RVV container");
2614 case MVT::i1:
2615 case MVT::i8:
2616 case MVT::i16:
2617 case MVT::i32:
2618 case MVT::i64:
2619 case MVT::bf16:
2620 case MVT::f16:
2621 case MVT::f32:
2622 case MVT::f64: {
2623 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2624 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2625 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2626 unsigned NumElts =
2628 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2629 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2630 return MVT::getScalableVectorVT(EltVT, NumElts);
2631 }
2632 }
2633}
2634
2636 const RISCVSubtarget &Subtarget) {
2638 Subtarget);
2639}
2640
2642 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2643}
2644
2645// Grow V to consume an entire RVV register.
2647 const RISCVSubtarget &Subtarget) {
2648 assert(VT.isScalableVector() &&
2649 "Expected to convert into a scalable vector!");
2650 assert(V.getValueType().isFixedLengthVector() &&
2651 "Expected a fixed length vector operand!");
2652 SDLoc DL(V);
2653 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2654 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2655}
2656
2657// Shrink V so it's just big enough to maintain a VT's worth of data.
2659 const RISCVSubtarget &Subtarget) {
2661 "Expected to convert into a fixed length vector!");
2662 assert(V.getValueType().isScalableVector() &&
2663 "Expected a scalable vector operand!");
2664 SDLoc DL(V);
2665 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2666 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2667}
2668
2669/// Return the type of the mask type suitable for masking the provided
2670/// vector type. This is simply an i1 element type vector of the same
2671/// (possibly scalable) length.
2672static MVT getMaskTypeFor(MVT VecVT) {
2673 assert(VecVT.isVector());
2675 return MVT::getVectorVT(MVT::i1, EC);
2676}
2677
2678/// Creates an all ones mask suitable for masking a vector of type VecTy with
2679/// vector length VL. .
2680static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2681 SelectionDAG &DAG) {
2682 MVT MaskVT = getMaskTypeFor(VecVT);
2683 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2684}
2685
2686static std::pair<SDValue, SDValue>
2688 const RISCVSubtarget &Subtarget) {
2689 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2690 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2691 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2692 return {Mask, VL};
2693}
2694
2695static std::pair<SDValue, SDValue>
2696getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2697 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2698 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2699 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2700 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2701 return {Mask, VL};
2702}
2703
2704// Gets the two common "VL" operands: an all-ones mask and the vector length.
2705// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2706// the vector type that the fixed-length vector is contained in. Otherwise if
2707// VecVT is scalable, then ContainerVT should be the same as VecVT.
2708static std::pair<SDValue, SDValue>
2709getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2710 const RISCVSubtarget &Subtarget) {
2711 if (VecVT.isFixedLengthVector())
2712 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2713 Subtarget);
2714 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2715 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2716}
2717
2719 SelectionDAG &DAG) const {
2720 assert(VecVT.isScalableVector() && "Expected scalable vector");
2721 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2722 VecVT.getVectorElementCount());
2723}
2724
2725std::pair<unsigned, unsigned>
2727 const RISCVSubtarget &Subtarget) {
2728 assert(VecVT.isScalableVector() && "Expected scalable vector");
2729
2730 unsigned EltSize = VecVT.getScalarSizeInBits();
2731 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2732
2733 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2734 unsigned MaxVLMAX =
2735 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2736
2737 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2738 unsigned MinVLMAX =
2739 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2740
2741 return std::make_pair(MinVLMAX, MaxVLMAX);
2742}
2743
2744// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2745// of either is (currently) supported. This can get us into an infinite loop
2746// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2747// as a ..., etc.
2748// Until either (or both) of these can reliably lower any node, reporting that
2749// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2750// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2751// which is not desirable.
2753 EVT VT, unsigned DefinedValues) const {
2754 return false;
2755}
2756
2758 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2759 // implementation-defined.
2760 if (!VT.isVector())
2762 unsigned DLenFactor = Subtarget.getDLenFactor();
2763 unsigned Cost;
2764 if (VT.isScalableVector()) {
2765 unsigned LMul;
2766 bool Fractional;
2767 std::tie(LMul, Fractional) =
2769 if (Fractional)
2770 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2771 else
2772 Cost = (LMul * DLenFactor);
2773 } else {
2774 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2775 }
2776 return Cost;
2777}
2778
2779
2780/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2781/// is generally quadratic in the number of vreg implied by LMUL. Note that
2782/// operand (index and possibly mask) are handled separately.
2784 return getLMULCost(VT) * getLMULCost(VT);
2785}
2786
2787/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2788/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2789/// or may track the vrgather.vv cost. It is implementation-dependent.
2791 return getLMULCost(VT);
2792}
2793
2794/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2795/// for the type VT. (This does not cover the vslide1up or vslide1down
2796/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2797/// or may track the vrgather.vv cost. It is implementation-dependent.
2799 return getLMULCost(VT);
2800}
2801
2802/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2803/// for the type VT. (This does not cover the vslide1up or vslide1down
2804/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2805/// or may track the vrgather.vv cost. It is implementation-dependent.
2807 return getLMULCost(VT);
2808}
2809
2811 const RISCVSubtarget &Subtarget) {
2812 // RISC-V FP-to-int conversions saturate to the destination register size, but
2813 // don't produce 0 for nan. We can use a conversion instruction and fix the
2814 // nan case with a compare and a select.
2815 SDValue Src = Op.getOperand(0);
2816
2817 MVT DstVT = Op.getSimpleValueType();
2818 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2819
2820 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2821
2822 if (!DstVT.isVector()) {
2823 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2824 // the result.
2825 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2826 Src.getValueType() == MVT::bf16) {
2827 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2828 }
2829
2830 unsigned Opc;
2831 if (SatVT == DstVT)
2832 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2833 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2835 else
2836 return SDValue();
2837 // FIXME: Support other SatVTs by clamping before or after the conversion.
2838
2839 SDLoc DL(Op);
2840 SDValue FpToInt = DAG.getNode(
2841 Opc, DL, DstVT, Src,
2843
2844 if (Opc == RISCVISD::FCVT_WU_RV64)
2845 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2846
2847 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2848 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2850 }
2851
2852 // Vectors.
2853
2854 MVT DstEltVT = DstVT.getVectorElementType();
2855 MVT SrcVT = Src.getSimpleValueType();
2856 MVT SrcEltVT = SrcVT.getVectorElementType();
2857 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2858 unsigned DstEltSize = DstEltVT.getSizeInBits();
2859
2860 // Only handle saturating to the destination type.
2861 if (SatVT != DstEltVT)
2862 return SDValue();
2863
2864 MVT DstContainerVT = DstVT;
2865 MVT SrcContainerVT = SrcVT;
2866 if (DstVT.isFixedLengthVector()) {
2867 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2868 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2869 assert(DstContainerVT.getVectorElementCount() ==
2870 SrcContainerVT.getVectorElementCount() &&
2871 "Expected same element count");
2872 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2873 }
2874
2875 SDLoc DL(Op);
2876
2877 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2878
2879 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2880 {Src, Src, DAG.getCondCode(ISD::SETNE),
2881 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2882
2883 // Need to widen by more than 1 step, promote the FP type, then do a widening
2884 // convert.
2885 if (DstEltSize > (2 * SrcEltSize)) {
2886 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2887 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2888 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2889 }
2890
2891 MVT CvtContainerVT = DstContainerVT;
2892 MVT CvtEltVT = DstEltVT;
2893 if (SrcEltSize > (2 * DstEltSize)) {
2894 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2895 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2896 }
2897
2898 unsigned RVVOpc =
2900 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
2901
2902 while (CvtContainerVT != DstContainerVT) {
2903 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
2904 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2905 // Rounding mode here is arbitrary since we aren't shifting out any bits.
2906 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
2908 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
2909 }
2910
2911 SDValue SplatZero = DAG.getNode(
2912 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2913 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2914 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
2915 Res, DAG.getUNDEF(DstContainerVT), VL);
2916
2917 if (DstVT.isFixedLengthVector())
2918 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2919
2920 return Res;
2921}
2922
2924 switch (Opc) {
2925 case ISD::FROUNDEVEN:
2927 case ISD::VP_FROUNDEVEN:
2928 return RISCVFPRndMode::RNE;
2929 case ISD::FTRUNC:
2930 case ISD::STRICT_FTRUNC:
2931 case ISD::VP_FROUNDTOZERO:
2932 return RISCVFPRndMode::RTZ;
2933 case ISD::FFLOOR:
2934 case ISD::STRICT_FFLOOR:
2935 case ISD::VP_FFLOOR:
2936 return RISCVFPRndMode::RDN;
2937 case ISD::FCEIL:
2938 case ISD::STRICT_FCEIL:
2939 case ISD::VP_FCEIL:
2940 return RISCVFPRndMode::RUP;
2941 case ISD::FROUND:
2942 case ISD::STRICT_FROUND:
2943 case ISD::VP_FROUND:
2944 return RISCVFPRndMode::RMM;
2945 case ISD::FRINT:
2946 return RISCVFPRndMode::DYN;
2947 }
2948
2950}
2951
2952// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2953// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2954// the integer domain and back. Taking care to avoid converting values that are
2955// nan or already correct.
2956static SDValue
2958 const RISCVSubtarget &Subtarget) {
2959 MVT VT = Op.getSimpleValueType();
2960 assert(VT.isVector() && "Unexpected type");
2961
2962 SDLoc DL(Op);
2963
2964 SDValue Src = Op.getOperand(0);
2965
2966 MVT ContainerVT = VT;
2967 if (VT.isFixedLengthVector()) {
2968 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2969 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2970 }
2971
2972 SDValue Mask, VL;
2973 if (Op->isVPOpcode()) {
2974 Mask = Op.getOperand(1);
2975 if (VT.isFixedLengthVector())
2976 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
2977 Subtarget);
2978 VL = Op.getOperand(2);
2979 } else {
2980 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2981 }
2982
2983 // Freeze the source since we are increasing the number of uses.
2984 Src = DAG.getFreeze(Src);
2985
2986 // We do the conversion on the absolute value and fix the sign at the end.
2987 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2988
2989 // Determine the largest integer that can be represented exactly. This and
2990 // values larger than it don't have any fractional bits so don't need to
2991 // be converted.
2992 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
2993 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2994 APFloat MaxVal = APFloat(FltSem);
2995 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2996 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2997 SDValue MaxValNode =
2998 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2999 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3000 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3001
3002 // If abs(Src) was larger than MaxVal or nan, keep it.
3003 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3004 Mask =
3005 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3006 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3007 Mask, Mask, VL});
3008
3009 // Truncate to integer and convert back to FP.
3010 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3011 MVT XLenVT = Subtarget.getXLenVT();
3012 SDValue Truncated;
3013
3014 switch (Op.getOpcode()) {
3015 default:
3016 llvm_unreachable("Unexpected opcode");
3017 case ISD::FCEIL:
3018 case ISD::VP_FCEIL:
3019 case ISD::FFLOOR:
3020 case ISD::VP_FFLOOR:
3021 case ISD::FROUND:
3022 case ISD::FROUNDEVEN:
3023 case ISD::VP_FROUND:
3024 case ISD::VP_FROUNDEVEN:
3025 case ISD::VP_FROUNDTOZERO: {
3028 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3029 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3030 break;
3031 }
3032 case ISD::FTRUNC:
3033 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3034 Mask, VL);
3035 break;
3036 case ISD::FRINT:
3037 case ISD::VP_FRINT:
3038 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
3039 break;
3040 case ISD::FNEARBYINT:
3041 case ISD::VP_FNEARBYINT:
3042 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3043 Mask, VL);
3044 break;
3045 }
3046
3047 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3048 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3049 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3050 Mask, VL);
3051
3052 // Restore the original sign so that -0.0 is preserved.
3053 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3054 Src, Src, Mask, VL);
3055
3056 if (!VT.isFixedLengthVector())
3057 return Truncated;
3058
3059 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3060}
3061
3062// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3063// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3064// qNan and coverting the new source to integer and back to FP.
3065static SDValue
3067 const RISCVSubtarget &Subtarget) {
3068 SDLoc DL(Op);
3069 MVT VT = Op.getSimpleValueType();
3070 SDValue Chain = Op.getOperand(0);
3071 SDValue Src = Op.getOperand(1);
3072
3073 MVT ContainerVT = VT;
3074 if (VT.isFixedLengthVector()) {
3075 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3076 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3077 }
3078
3079 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3080
3081 // Freeze the source since we are increasing the number of uses.
3082 Src = DAG.getFreeze(Src);
3083
3084 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3085 MVT MaskVT = Mask.getSimpleValueType();
3087 DAG.getVTList(MaskVT, MVT::Other),
3088 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3089 DAG.getUNDEF(MaskVT), Mask, VL});
3090 Chain = Unorder.getValue(1);
3092 DAG.getVTList(ContainerVT, MVT::Other),
3093 {Chain, Src, Src, Src, Unorder, VL});
3094 Chain = Src.getValue(1);
3095
3096 // We do the conversion on the absolute value and fix the sign at the end.
3097 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3098
3099 // Determine the largest integer that can be represented exactly. This and
3100 // values larger than it don't have any fractional bits so don't need to
3101 // be converted.
3102 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3103 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3104 APFloat MaxVal = APFloat(FltSem);
3105 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3106 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3107 SDValue MaxValNode =
3108 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3109 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3110 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3111
3112 // If abs(Src) was larger than MaxVal or nan, keep it.
3113 Mask = DAG.getNode(
3114 RISCVISD::SETCC_VL, DL, MaskVT,
3115 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3116
3117 // Truncate to integer and convert back to FP.
3118 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3119 MVT XLenVT = Subtarget.getXLenVT();
3120 SDValue Truncated;
3121
3122 switch (Op.getOpcode()) {
3123 default:
3124 llvm_unreachable("Unexpected opcode");
3125 case ISD::STRICT_FCEIL:
3126 case ISD::STRICT_FFLOOR:
3127 case ISD::STRICT_FROUND:
3131 Truncated = DAG.getNode(
3132 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3133 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3134 break;
3135 }
3136 case ISD::STRICT_FTRUNC:
3137 Truncated =
3139 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3140 break;
3143 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3144 Mask, VL);
3145 break;
3146 }
3147 Chain = Truncated.getValue(1);
3148
3149 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3150 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3151 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3152 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3153 Truncated, Mask, VL);
3154 Chain = Truncated.getValue(1);
3155 }
3156
3157 // Restore the original sign so that -0.0 is preserved.
3158 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3159 Src, Src, Mask, VL);
3160
3161 if (VT.isFixedLengthVector())
3162 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3163 return DAG.getMergeValues({Truncated, Chain}, DL);
3164}
3165
3166static SDValue
3168 const RISCVSubtarget &Subtarget) {
3169 MVT VT = Op.getSimpleValueType();
3170 if (VT.isVector())
3171 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3172
3173 if (DAG.shouldOptForSize())
3174 return SDValue();
3175
3176 SDLoc DL(Op);
3177 SDValue Src = Op.getOperand(0);
3178
3179 // Create an integer the size of the mantissa with the MSB set. This and all
3180 // values larger than it don't have any fractional bits so don't need to be
3181 // converted.
3182 const fltSemantics &FltSem = VT.getFltSemantics();
3183 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3184 APFloat MaxVal = APFloat(FltSem);
3185 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3186 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3187 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3188
3190 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3191 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3192}
3193
3194// Expand vector LRINT and LLRINT by converting to the integer domain.
3196 const RISCVSubtarget &Subtarget) {
3197 MVT VT = Op.getSimpleValueType();
3198 assert(VT.isVector() && "Unexpected type");
3199
3200 SDLoc DL(Op);
3201 SDValue Src = Op.getOperand(0);
3202 MVT ContainerVT = VT;
3203
3204 if (VT.isFixedLengthVector()) {
3205 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3206 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3207 }
3208
3209 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3210 SDValue Truncated =
3211 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3212
3213 if (!VT.isFixedLengthVector())
3214 return Truncated;
3215
3216 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3217}
3218
3219static SDValue
3221 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3222 SDValue Offset, SDValue Mask, SDValue VL,
3224 if (Passthru.isUndef())
3226 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3227 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3228 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3229}
3230
3231static SDValue
3232getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3233 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3234 SDValue VL,
3236 if (Passthru.isUndef())
3238 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3239 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3240 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3241}
3242
3243static MVT getLMUL1VT(MVT VT) {
3245 "Unexpected vector MVT");
3249}
3250
3254 int64_t Addend;
3255};
3256
3257static std::optional<APInt> getExactInteger(const APFloat &APF,
3259 // We will use a SINT_TO_FP to materialize this constant so we should use a
3260 // signed APSInt here.
3261 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3262 // We use an arbitrary rounding mode here. If a floating-point is an exact
3263 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3264 // the rounding mode changes the output value, then it is not an exact
3265 // integer.
3267 bool IsExact;
3268 // If it is out of signed integer range, it will return an invalid operation.
3269 // If it is not an exact integer, IsExact is false.
3270 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3272 !IsExact)
3273 return std::nullopt;
3274 return ValInt.extractBits(BitWidth, 0);
3275}
3276
3277// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3278// to the (non-zero) step S and start value X. This can be then lowered as the
3279// RVV sequence (VID * S) + X, for example.
3280// The step S is represented as an integer numerator divided by a positive
3281// denominator. Note that the implementation currently only identifies
3282// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3283// cannot detect 2/3, for example.
3284// Note that this method will also match potentially unappealing index
3285// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3286// determine whether this is worth generating code for.
3287//
3288// EltSizeInBits is the size of the type that the sequence will be calculated
3289// in, i.e. SEW for build_vectors or XLEN for address calculations.
3290static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3291 unsigned EltSizeInBits) {
3292 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3293 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3294 return std::nullopt;
3295 bool IsInteger = Op.getValueType().isInteger();
3296
3297 std::optional<unsigned> SeqStepDenom;
3298 std::optional<APInt> SeqStepNum;
3299 std::optional<APInt> SeqAddend;
3300 std::optional<std::pair<APInt, unsigned>> PrevElt;
3301 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3302
3303 // First extract the ops into a list of constant integer values. This may not
3304 // be possible for floats if they're not all representable as integers.
3306 const unsigned OpSize = Op.getScalarValueSizeInBits();
3307 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3308 if (Elt.isUndef()) {
3309 Elts[Idx] = std::nullopt;
3310 continue;
3311 }
3312 if (IsInteger) {
3313 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3314 } else {
3315 auto ExactInteger =
3316 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3317 if (!ExactInteger)
3318 return std::nullopt;
3319 Elts[Idx] = *ExactInteger;
3320 }
3321 }
3322
3323 for (auto [Idx, Elt] : enumerate(Elts)) {
3324 // Assume undef elements match the sequence; we just have to be careful
3325 // when interpolating across them.
3326 if (!Elt)
3327 continue;
3328
3329 if (PrevElt) {
3330 // Calculate the step since the last non-undef element, and ensure
3331 // it's consistent across the entire sequence.
3332 unsigned IdxDiff = Idx - PrevElt->second;
3333 APInt ValDiff = *Elt - PrevElt->first;
3334
3335 // A zero-value value difference means that we're somewhere in the middle
3336 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3337 // step change before evaluating the sequence.
3338 if (ValDiff == 0)
3339 continue;
3340
3341 int64_t Remainder = ValDiff.srem(IdxDiff);
3342 // Normalize the step if it's greater than 1.
3343 if (Remainder != ValDiff.getSExtValue()) {
3344 // The difference must cleanly divide the element span.
3345 if (Remainder != 0)
3346 return std::nullopt;
3347 ValDiff = ValDiff.sdiv(IdxDiff);
3348 IdxDiff = 1;
3349 }
3350
3351 if (!SeqStepNum)
3352 SeqStepNum = ValDiff;
3353 else if (ValDiff != SeqStepNum)
3354 return std::nullopt;
3355
3356 if (!SeqStepDenom)
3357 SeqStepDenom = IdxDiff;
3358 else if (IdxDiff != *SeqStepDenom)
3359 return std::nullopt;
3360 }
3361
3362 // Record this non-undef element for later.
3363 if (!PrevElt || PrevElt->first != *Elt)
3364 PrevElt = std::make_pair(*Elt, Idx);
3365 }
3366
3367 // We need to have logged a step for this to count as a legal index sequence.
3368 if (!SeqStepNum || !SeqStepDenom)
3369 return std::nullopt;
3370
3371 // Loop back through the sequence and validate elements we might have skipped
3372 // while waiting for a valid step. While doing this, log any sequence addend.
3373 for (auto [Idx, Elt] : enumerate(Elts)) {
3374 if (!Elt)
3375 continue;
3376 APInt ExpectedVal =
3377 (APInt(EltSizeInBits, Idx) * *SeqStepNum).sdiv(*SeqStepDenom);
3378
3379 APInt Addend = *Elt - ExpectedVal;
3380 if (!SeqAddend)
3381 SeqAddend = Addend;
3382 else if (Addend != SeqAddend)
3383 return std::nullopt;
3384 }
3385
3386 assert(SeqAddend && "Must have an addend if we have a step");
3387
3388 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3389 SeqAddend->getSExtValue()};
3390}
3391
3392// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3393// and lower it as a VRGATHER_VX_VL from the source vector.
3394static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3395 SelectionDAG &DAG,
3396 const RISCVSubtarget &Subtarget) {
3397 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3398 return SDValue();
3399 SDValue Vec = SplatVal.getOperand(0);
3400 // Only perform this optimization on vectors of the same size for simplicity.
3401 // Don't perform this optimization for i1 vectors.
3402 // FIXME: Support i1 vectors, maybe by promoting to i8?
3403 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3404 return SDValue();
3405 SDValue Idx = SplatVal.getOperand(1);
3406 // The index must be a legal type.
3407 if (Idx.getValueType() != Subtarget.getXLenVT())
3408 return SDValue();
3409
3410 MVT ContainerVT = VT;
3411 if (VT.isFixedLengthVector()) {
3412 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3413 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3414 }
3415
3416 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3417
3418 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3419 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3420
3421 if (!VT.isFixedLengthVector())
3422 return Gather;
3423
3424 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3425}
3426
3427
3428/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3429/// which constitute a large proportion of the elements. In such cases we can
3430/// splat a vector with the dominant element and make up the shortfall with
3431/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3432/// Note that this includes vectors of 2 elements by association. The
3433/// upper-most element is the "dominant" one, allowing us to use a splat to
3434/// "insert" the upper element, and an insert of the lower element at position
3435/// 0, which improves codegen.
3437 const RISCVSubtarget &Subtarget) {
3438 MVT VT = Op.getSimpleValueType();
3439 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3440
3441 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3442
3443 SDLoc DL(Op);
3444 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3445
3446 MVT XLenVT = Subtarget.getXLenVT();
3447 unsigned NumElts = Op.getNumOperands();
3448
3449 SDValue DominantValue;
3450 unsigned MostCommonCount = 0;
3451 DenseMap<SDValue, unsigned> ValueCounts;
3452 unsigned NumUndefElts =
3453 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3454
3455 // Track the number of scalar loads we know we'd be inserting, estimated as
3456 // any non-zero floating-point constant. Other kinds of element are either
3457 // already in registers or are materialized on demand. The threshold at which
3458 // a vector load is more desirable than several scalar materializion and
3459 // vector-insertion instructions is not known.
3460 unsigned NumScalarLoads = 0;
3461
3462 for (SDValue V : Op->op_values()) {
3463 if (V.isUndef())
3464 continue;
3465
3466 ValueCounts.insert(std::make_pair(V, 0));
3467 unsigned &Count = ValueCounts[V];
3468 if (0 == Count)
3469 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3470 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3471
3472 // Is this value dominant? In case of a tie, prefer the highest element as
3473 // it's cheaper to insert near the beginning of a vector than it is at the
3474 // end.
3475 if (++Count >= MostCommonCount) {
3476 DominantValue = V;
3477 MostCommonCount = Count;
3478 }
3479 }
3480
3481 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3482 unsigned NumDefElts = NumElts - NumUndefElts;
3483 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3484
3485 // Don't perform this optimization when optimizing for size, since
3486 // materializing elements and inserting them tends to cause code bloat.
3487 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3488 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3489 ((MostCommonCount > DominantValueCountThreshold) ||
3490 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3491 // Start by splatting the most common element.
3492 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3493
3494 DenseSet<SDValue> Processed{DominantValue};
3495
3496 // We can handle an insert into the last element (of a splat) via
3497 // v(f)slide1down. This is slightly better than the vslideup insert
3498 // lowering as it avoids the need for a vector group temporary. It
3499 // is also better than using vmerge.vx as it avoids the need to
3500 // materialize the mask in a vector register.
3501 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3502 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3503 LastOp != DominantValue) {
3504 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3505 auto OpCode =
3507 if (!VT.isFloatingPoint())
3508 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3509 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3510 LastOp, Mask, VL);
3511 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3512 Processed.insert(LastOp);
3513 }
3514
3515 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3516 for (const auto &OpIdx : enumerate(Op->ops())) {
3517 const SDValue &V = OpIdx.value();
3518 if (V.isUndef() || !Processed.insert(V).second)
3519 continue;
3520 if (ValueCounts[V] == 1) {
3521 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3522 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3523 } else {
3524 // Blend in all instances of this value using a VSELECT, using a
3525 // mask where each bit signals whether that element is the one
3526 // we're after.
3528 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3529 return DAG.getConstant(V == V1, DL, XLenVT);
3530 });
3531 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3532 DAG.getBuildVector(SelMaskTy, DL, Ops),
3533 DAG.getSplatBuildVector(VT, DL, V), Vec);
3534 }
3535 }
3536
3537 return Vec;
3538 }
3539
3540 return SDValue();
3541}
3542
3544 const RISCVSubtarget &Subtarget) {
3545 MVT VT = Op.getSimpleValueType();
3546 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3547
3548 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3549
3550 SDLoc DL(Op);
3551 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3552
3553 MVT XLenVT = Subtarget.getXLenVT();
3554 unsigned NumElts = Op.getNumOperands();
3555
3556 if (VT.getVectorElementType() == MVT::i1) {
3557 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3558 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3559 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3560 }
3561
3562 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3563 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3564 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3565 }
3566
3567 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3568 // scalar integer chunks whose bit-width depends on the number of mask
3569 // bits and XLEN.
3570 // First, determine the most appropriate scalar integer type to use. This
3571 // is at most XLenVT, but may be shrunk to a smaller vector element type
3572 // according to the size of the final vector - use i8 chunks rather than
3573 // XLenVT if we're producing a v8i1. This results in more consistent
3574 // codegen across RV32 and RV64.
3575 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3576 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3577 // If we have to use more than one INSERT_VECTOR_ELT then this
3578 // optimization is likely to increase code size; avoid peforming it in
3579 // such a case. We can use a load from a constant pool in this case.
3580 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3581 return SDValue();
3582 // Now we can create our integer vector type. Note that it may be larger
3583 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3584 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3585 MVT IntegerViaVecVT =
3586 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3587 IntegerViaVecElts);
3588
3589 uint64_t Bits = 0;
3590 unsigned BitPos = 0, IntegerEltIdx = 0;
3591 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3592
3593 for (unsigned I = 0; I < NumElts;) {
3594 SDValue V = Op.getOperand(I);
3595 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3596 Bits |= ((uint64_t)BitValue << BitPos);
3597 ++BitPos;
3598 ++I;
3599
3600 // Once we accumulate enough bits to fill our scalar type or process the
3601 // last element, insert into our vector and clear our accumulated data.
3602 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3603 if (NumViaIntegerBits <= 32)
3604 Bits = SignExtend64<32>(Bits);
3605 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
3606 Elts[IntegerEltIdx] = Elt;
3607 Bits = 0;
3608 BitPos = 0;
3609 IntegerEltIdx++;
3610 }
3611 }
3612
3613 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3614
3615 if (NumElts < NumViaIntegerBits) {
3616 // If we're producing a smaller vector than our minimum legal integer
3617 // type, bitcast to the equivalent (known-legal) mask type, and extract
3618 // our final mask.
3619 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3620 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3621 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3622 DAG.getConstant(0, DL, XLenVT));
3623 } else {
3624 // Else we must have produced an integer type with the same size as the
3625 // mask type; bitcast for the final result.
3626 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3627 Vec = DAG.getBitcast(VT, Vec);
3628 }
3629
3630 return Vec;
3631 }
3632
3633 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3634 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3636 if (!VT.isFloatingPoint())
3637 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3638 Splat =
3639 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3640 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3641 }
3642
3643 // Try and match index sequences, which we can lower to the vid instruction
3644 // with optional modifications. An all-undef vector is matched by
3645 // getSplatValue, above.
3646 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3647 int64_t StepNumerator = SimpleVID->StepNumerator;
3648 unsigned StepDenominator = SimpleVID->StepDenominator;
3649 int64_t Addend = SimpleVID->Addend;
3650
3651 assert(StepNumerator != 0 && "Invalid step");
3652 bool Negate = false;
3653 int64_t SplatStepVal = StepNumerator;
3654 unsigned StepOpcode = ISD::MUL;
3655 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3656 // anyway as the shift of 63 won't fit in uimm5.
3657 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3658 isPowerOf2_64(std::abs(StepNumerator))) {
3659 Negate = StepNumerator < 0;
3660 StepOpcode = ISD::SHL;
3661 SplatStepVal = Log2_64(std::abs(StepNumerator));
3662 }
3663
3664 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3665 // threshold since it's the immediate value many RVV instructions accept.
3666 // There is no vmul.vi instruction so ensure multiply constant can fit in
3667 // a single addi instruction.
3668 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3669 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3670 isPowerOf2_32(StepDenominator) &&
3671 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3672 MVT VIDVT =
3674 MVT VIDContainerVT =
3675 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3676 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3677 // Convert right out of the scalable type so we can use standard ISD
3678 // nodes for the rest of the computation. If we used scalable types with
3679 // these, we'd lose the fixed-length vector info and generate worse
3680 // vsetvli code.
3681 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3682 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3683 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3684 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3685 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3686 }
3687 if (StepDenominator != 1) {
3688 SDValue SplatStep =
3689 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3690 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3691 }
3692 if (Addend != 0 || Negate) {
3693 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3694 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3695 VID);
3696 }
3697 if (VT.isFloatingPoint()) {
3698 // TODO: Use vfwcvt to reduce register pressure.
3699 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3700 }
3701 return VID;
3702 }
3703 }
3704
3705 // For very small build_vectors, use a single scalar insert of a constant.
3706 // TODO: Base this on constant rematerialization cost, not size.
3707 const unsigned EltBitSize = VT.getScalarSizeInBits();
3708 if (VT.getSizeInBits() <= 32 &&
3710 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3711 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3712 "Unexpected sequence type");
3713 // If we can use the original VL with the modified element type, this
3714 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3715 // be moved into InsertVSETVLI?
3716 unsigned ViaVecLen =
3717 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3718 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3719
3720 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3721 uint64_t SplatValue = 0;
3722 // Construct the amalgamated value at this larger vector type.
3723 for (const auto &OpIdx : enumerate(Op->op_values())) {
3724 const auto &SeqV = OpIdx.value();
3725 if (!SeqV.isUndef())
3726 SplatValue |=
3727 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3728 }
3729
3730 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3731 // achieve better constant materializion.
3732 // On RV32, we need to sign-extend to use getSignedConstant.
3733 if (ViaIntVT == MVT::i32)
3734 SplatValue = SignExtend64<32>(SplatValue);
3735
3736 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3737 DAG.getUNDEF(ViaVecVT),
3738 DAG.getSignedConstant(SplatValue, DL, XLenVT),
3739 DAG.getVectorIdxConstant(0, DL));
3740 if (ViaVecLen != 1)
3742 MVT::getVectorVT(ViaIntVT, 1), Vec,
3743 DAG.getConstant(0, DL, XLenVT));
3744 return DAG.getBitcast(VT, Vec);
3745 }
3746
3747
3748 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3749 // when re-interpreted as a vector with a larger element type. For example,
3750 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3751 // could be instead splat as
3752 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3753 // TODO: This optimization could also work on non-constant splats, but it
3754 // would require bit-manipulation instructions to construct the splat value.
3755 SmallVector<SDValue> Sequence;
3756 const auto *BV = cast<BuildVectorSDNode>(Op);
3757 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3759 BV->getRepeatedSequence(Sequence) &&
3760 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3761 unsigned SeqLen = Sequence.size();
3762 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3763 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3764 ViaIntVT == MVT::i64) &&
3765 "Unexpected sequence type");
3766
3767 // If we can use the original VL with the modified element type, this
3768 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3769 // be moved into InsertVSETVLI?
3770 const unsigned RequiredVL = NumElts / SeqLen;
3771 const unsigned ViaVecLen =
3772 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3773 NumElts : RequiredVL;
3774 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3775
3776 unsigned EltIdx = 0;
3777 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3778 uint64_t SplatValue = 0;
3779 // Construct the amalgamated value which can be splatted as this larger
3780 // vector type.
3781 for (const auto &SeqV : Sequence) {
3782 if (!SeqV.isUndef())
3783 SplatValue |=
3784 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3785 EltIdx++;
3786 }
3787
3788 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3789 // achieve better constant materializion.
3790 // On RV32, we need to sign-extend to use getSignedConstant.
3791 if (ViaIntVT == MVT::i32)
3792 SplatValue = SignExtend64<32>(SplatValue);
3793
3794 // Since we can't introduce illegal i64 types at this stage, we can only
3795 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3796 // way we can use RVV instructions to splat.
3797 assert((ViaIntVT.bitsLE(XLenVT) ||
3798 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3799 "Unexpected bitcast sequence");
3800 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3801 SDValue ViaVL =
3802 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3803 MVT ViaContainerVT =
3804 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3805 SDValue Splat =
3806 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3807 DAG.getUNDEF(ViaContainerVT),
3808 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
3809 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3810 if (ViaVecLen != RequiredVL)
3812 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3813 DAG.getConstant(0, DL, XLenVT));
3814 return DAG.getBitcast(VT, Splat);
3815 }
3816 }
3817
3818 // If the number of signbits allows, see if we can lower as a <N x i8>.
3819 // Our main goal here is to reduce LMUL (and thus work) required to
3820 // build the constant, but we will also narrow if the resulting
3821 // narrow vector is known to materialize cheaply.
3822 // TODO: We really should be costing the smaller vector. There are
3823 // profitable cases this misses.
3824 if (EltBitSize > 8 && VT.isInteger() &&
3825 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
3826 DAG.ComputeMaxSignificantBits(Op) <= 8) {
3827 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3828 DL, Op->ops());
3829 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3830 Source, DAG, Subtarget);
3831 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3832 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3833 }
3834
3835 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3836 return Res;
3837
3838 // For constant vectors, use generic constant pool lowering. Otherwise,
3839 // we'd have to materialize constants in GPRs just to move them into the
3840 // vector.
3841 return SDValue();
3842}
3843
3844static unsigned getPACKOpcode(unsigned DestBW,
3845 const RISCVSubtarget &Subtarget) {
3846 switch (DestBW) {
3847 default:
3848 llvm_unreachable("Unsupported pack size");
3849 case 16:
3850 return RISCV::PACKH;
3851 case 32:
3852 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
3853 case 64:
3854 assert(Subtarget.is64Bit());
3855 return RISCV::PACK;
3856 }
3857}
3858
3859/// Double the element size of the build vector to reduce the number
3860/// of vslide1down in the build vector chain. In the worst case, this
3861/// trades three scalar operations for 1 vector operation. Scalar
3862/// operations are generally lower latency, and for out-of-order cores
3863/// we also benefit from additional parallelism.
3865 const RISCVSubtarget &Subtarget) {
3866 SDLoc DL(Op);
3867 MVT VT = Op.getSimpleValueType();
3868 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3869 MVT ElemVT = VT.getVectorElementType();
3870 if (!ElemVT.isInteger())
3871 return SDValue();
3872
3873 // TODO: Relax these architectural restrictions, possibly with costing
3874 // of the actual instructions required.
3875 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
3876 return SDValue();
3877
3878 unsigned NumElts = VT.getVectorNumElements();
3879 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
3880 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
3881 NumElts % 2 != 0)
3882 return SDValue();
3883
3884 // Produce [B,A] packed into a type twice as wide. Note that all
3885 // scalars are XLenVT, possibly masked (see below).
3886 MVT XLenVT = Subtarget.getXLenVT();
3887 SDValue Mask = DAG.getConstant(
3888 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
3889 auto pack = [&](SDValue A, SDValue B) {
3890 // Bias the scheduling of the inserted operations to near the
3891 // definition of the element - this tends to reduce register
3892 // pressure overall.
3893 SDLoc ElemDL(B);
3894 if (Subtarget.hasStdExtZbkb())
3895 // Note that we're relying on the high bits of the result being
3896 // don't care. For PACKW, the result is *sign* extended.
3897 return SDValue(
3898 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
3899 ElemDL, XLenVT, A, B),
3900 0);
3901
3902 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
3903 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
3904 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
3905 SDNodeFlags Flags;
3906 Flags.setDisjoint(true);
3907 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
3908 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt), Flags);
3909 };
3910
3911 SmallVector<SDValue> NewOperands;
3912 NewOperands.reserve(NumElts / 2);
3913 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
3914 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
3915 assert(NumElts == NewOperands.size() * 2);
3916 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
3917 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
3918 return DAG.getNode(ISD::BITCAST, DL, VT,
3919 DAG.getBuildVector(WideVecVT, DL, NewOperands));
3920}
3921
3922// Convert to an vXf16 build_vector to vXi16 with bitcasts.
3924 MVT VT = Op.getSimpleValueType();
3925 MVT IVT = VT.changeVectorElementType(MVT::i16);
3927 for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I)
3928 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
3929 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), IVT, NewOps);
3930 return DAG.getBitcast(VT, Res);
3931}
3932
3934 const RISCVSubtarget &Subtarget) {
3935 MVT VT = Op.getSimpleValueType();
3936 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3937
3938 // If we don't have scalar f16, we need to bitcast to an i16 vector.
3939 if (VT.getVectorElementType() == MVT::f16 &&
3940 !Subtarget.hasStdExtZfhmin())
3941 return lowerBUILD_VECTORvXf16(Op, DAG);
3942
3943 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3945 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3946
3947 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3948
3949 SDLoc DL(Op);
3950 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3951
3952 MVT XLenVT = Subtarget.getXLenVT();
3953
3954 if (VT.getVectorElementType() == MVT::i1) {
3955 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3956 // vector type, we have a legal equivalently-sized i8 type, so we can use
3957 // that.
3958 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3959 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3960
3961 SDValue WideVec;
3962 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3963 // For a splat, perform a scalar truncate before creating the wider
3964 // vector.
3965 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
3966 DAG.getConstant(1, DL, Splat.getValueType()));
3967 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3968 } else {
3969 SmallVector<SDValue, 8> Ops(Op->op_values());
3970 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3971 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3972 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3973 }
3974
3975 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3976 }
3977
3978 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3979 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3980 return Gather;
3981 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3983 if (!VT.isFloatingPoint())
3984 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3985 Splat =
3986 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3987 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3988 }
3989
3990 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3991 return Res;
3992
3993 // If we're compiling for an exact VLEN value, we can split our work per
3994 // register in the register group.
3995 if (const auto VLen = Subtarget.getRealVLen();
3996 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
3997 MVT ElemVT = VT.getVectorElementType();
3998 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
3999 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4000 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4001 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4002 assert(M1VT == getLMUL1VT(M1VT));
4003
4004 // The following semantically builds up a fixed length concat_vector
4005 // of the component build_vectors. We eagerly lower to scalable and
4006 // insert_subvector here to avoid DAG combining it back to a large
4007 // build_vector.
4008 SmallVector<SDValue> BuildVectorOps(Op->ops());
4009 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4010 SDValue Vec = DAG.getUNDEF(ContainerVT);
4011 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4012 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4013 SDValue SubBV =
4014 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4015 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4016 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4017 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
4018 DAG.getVectorIdxConstant(InsertIdx, DL));
4019 }
4020 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4021 }
4022
4023 // If we're about to resort to vslide1down (or stack usage), pack our
4024 // elements into the widest scalar type we can. This will force a VL/VTYPE
4025 // toggle, but reduces the critical path, the number of vslide1down ops
4026 // required, and possibly enables scalar folds of the values.
4027 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4028 return Res;
4029
4030 // For m1 vectors, if we have non-undef values in both halves of our vector,
4031 // split the vector into low and high halves, build them separately, then
4032 // use a vselect to combine them. For long vectors, this cuts the critical
4033 // path of the vslide1down sequence in half, and gives us an opportunity
4034 // to special case each half independently. Note that we don't change the
4035 // length of the sub-vectors here, so if both fallback to the generic
4036 // vslide1down path, we should be able to fold the vselect into the final
4037 // vslidedown (for the undef tail) for the first half w/ masking.
4038 unsigned NumElts = VT.getVectorNumElements();
4039 unsigned NumUndefElts =
4040 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4041 unsigned NumDefElts = NumElts - NumUndefElts;
4042 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4043 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
4044 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4045 SmallVector<SDValue> MaskVals;
4046 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4047 SubVecAOps.reserve(NumElts);
4048 SubVecBOps.reserve(NumElts);
4049 for (unsigned i = 0; i < NumElts; i++) {
4050 SDValue Elem = Op->getOperand(i);
4051 if (i < NumElts / 2) {
4052 SubVecAOps.push_back(Elem);
4053 SubVecBOps.push_back(UndefElem);
4054 } else {
4055 SubVecAOps.push_back(UndefElem);
4056 SubVecBOps.push_back(Elem);
4057 }
4058 bool SelectMaskVal = (i < NumElts / 2);
4059 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4060 }
4061 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4062 MaskVals.size() == NumElts);
4063
4064 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4065 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4066 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4067 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4068 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4069 }
4070
4071 // Cap the cost at a value linear to the number of elements in the vector.
4072 // The default lowering is to use the stack. The vector store + scalar loads
4073 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4074 // being (at least) linear in LMUL. As a result, using the vslidedown
4075 // lowering for every element ends up being VL*LMUL..
4076 // TODO: Should we be directly costing the stack alternative? Doing so might
4077 // give us a more accurate upper bound.
4078 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4079
4080 // TODO: unify with TTI getSlideCost.
4081 InstructionCost PerSlideCost = 1;
4082 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4083 default: break;
4085 PerSlideCost = 2;
4086 break;
4088 PerSlideCost = 4;
4089 break;
4091 PerSlideCost = 8;
4092 break;
4093 }
4094
4095 // TODO: Should we be using the build instseq then cost + evaluate scheme
4096 // we use for integer constants here?
4097 unsigned UndefCount = 0;
4098 for (const SDValue &V : Op->ops()) {
4099 if (V.isUndef()) {
4100 UndefCount++;
4101 continue;
4102 }
4103 if (UndefCount) {
4104 LinearBudget -= PerSlideCost;
4105 UndefCount = 0;
4106 }
4107 LinearBudget -= PerSlideCost;
4108 }
4109 if (UndefCount) {
4110 LinearBudget -= PerSlideCost;
4111 }
4112
4113 if (LinearBudget < 0)
4114 return SDValue();
4115
4116 assert((!VT.isFloatingPoint() ||
4117 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4118 "Illegal type which will result in reserved encoding");
4119
4120 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4121
4122 SDValue Vec;
4123 UndefCount = 0;
4124 for (SDValue V : Op->ops()) {
4125 if (V.isUndef()) {
4126 UndefCount++;
4127 continue;
4128 }
4129
4130 // Start our sequence with a TA splat in the hopes that hardware is able to
4131 // recognize there's no dependency on the prior value of our temporary
4132 // register.
4133 if (!Vec) {
4134 Vec = DAG.getSplatVector(VT, DL, V);
4135 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4136 UndefCount = 0;
4137 continue;
4138 }
4139
4140 if (UndefCount) {
4141 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4142 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4143 Vec, Offset, Mask, VL, Policy);
4144 UndefCount = 0;
4145 }
4146 auto OpCode =
4148 if (!VT.isFloatingPoint())
4149 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4150 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4151 V, Mask, VL);
4152 }
4153 if (UndefCount) {
4154 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4155 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4156 Vec, Offset, Mask, VL, Policy);
4157 }
4158 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4159}
4160
4161static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4163 SelectionDAG &DAG) {
4164 if (!Passthru)
4165 Passthru = DAG.getUNDEF(VT);
4166 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4167 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4168 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4169 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4170 // node in order to try and match RVV vector/scalar instructions.
4171 if ((LoC >> 31) == HiC)
4172 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4173
4174 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4175 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4176 // vlmax vsetvli or vsetivli to change the VL.
4177 // FIXME: Support larger constants?
4178 // FIXME: Support non-constant VLs by saturating?
4179 if (LoC == HiC) {
4180 SDValue NewVL;
4181 if (isAllOnesConstant(VL) ||
4182 (isa<RegisterSDNode>(VL) &&
4183 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4184 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4185 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4186 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4187
4188 if (NewVL) {
4189 MVT InterVT =
4190 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4191 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4192 DAG.getUNDEF(InterVT), Lo, NewVL);
4193 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4194 }
4195 }
4196 }
4197
4198 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4199 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4200 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4201 Hi.getConstantOperandVal(1) == 31)
4202 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4203
4204 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4205 // even if it might be sign extended.
4206 if (Hi.isUndef())
4207 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4208
4209 // Fall back to a stack store and stride x0 vector load.
4210 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4211 Hi, VL);
4212}
4213
4214// Called by type legalization to handle splat of i64 on RV32.
4215// FIXME: We can optimize this when the type has sign or zero bits in one
4216// of the halves.
4217static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4218 SDValue Scalar, SDValue VL,
4219 SelectionDAG &DAG) {
4220 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4221 SDValue Lo, Hi;
4222 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4223 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4224}
4225
4226// This function lowers a splat of a scalar operand Splat with the vector
4227// length VL. It ensures the final sequence is type legal, which is useful when
4228// lowering a splat after type legalization.
4229static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4230 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4231 const RISCVSubtarget &Subtarget) {
4232 bool HasPassthru = Passthru && !Passthru.isUndef();
4233 if (!HasPassthru && !Passthru)
4234 Passthru = DAG.getUNDEF(VT);
4235 if (VT.isFloatingPoint())
4236 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4237
4238 MVT XLenVT = Subtarget.getXLenVT();
4239
4240 // Simplest case is that the operand needs to be promoted to XLenVT.
4241 if (Scalar.getValueType().bitsLE(XLenVT)) {
4242 // If the operand is a constant, sign extend to increase our chances
4243 // of being able to use a .vi instruction. ANY_EXTEND would become a
4244 // a zero extend and the simm5 check in isel would fail.
4245 // FIXME: Should we ignore the upper bits in isel instead?
4246 unsigned ExtOpc =
4247 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4248 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4249 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4250 }
4251
4252 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4253 "Unexpected scalar for splat lowering!");
4254
4255 if (isOneConstant(VL) && isNullConstant(Scalar))
4256 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4257 DAG.getConstant(0, DL, XLenVT), VL);
4258
4259 // Otherwise use the more complicated splatting algorithm.
4260 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4261}
4262
4263// This function lowers an insert of a scalar operand Scalar into lane
4264// 0 of the vector regardless of the value of VL. The contents of the
4265// remaining lanes of the result vector are unspecified. VL is assumed
4266// to be non-zero.
4268 const SDLoc &DL, SelectionDAG &DAG,
4269 const RISCVSubtarget &Subtarget) {
4270 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4271
4272 const MVT XLenVT = Subtarget.getXLenVT();
4273 SDValue Passthru = DAG.getUNDEF(VT);
4274
4275 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4276 isNullConstant(Scalar.getOperand(1))) {
4277 SDValue ExtractedVal = Scalar.getOperand(0);
4278 // The element types must be the same.
4279 if (ExtractedVal.getValueType().getVectorElementType() ==
4280 VT.getVectorElementType()) {
4281 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4282 MVT ExtractedContainerVT = ExtractedVT;
4283 if (ExtractedContainerVT.isFixedLengthVector()) {
4284 ExtractedContainerVT = getContainerForFixedLengthVector(
4285 DAG, ExtractedContainerVT, Subtarget);
4286 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4287 ExtractedVal, DAG, Subtarget);
4288 }
4289 if (ExtractedContainerVT.bitsLE(VT))
4290 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4291 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4292 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4293 DAG.getVectorIdxConstant(0, DL));
4294 }
4295 }
4296
4297
4298 if (VT.isFloatingPoint())
4299 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4300 DAG.getUNDEF(VT), Scalar, VL);
4301
4302 // Avoid the tricky legalization cases by falling back to using the
4303 // splat code which already handles it gracefully.
4304 if (!Scalar.getValueType().bitsLE(XLenVT))
4305 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4306 DAG.getConstant(1, DL, XLenVT),
4307 VT, DL, DAG, Subtarget);
4308
4309 // If the operand is a constant, sign extend to increase our chances
4310 // of being able to use a .vi instruction. ANY_EXTEND would become a
4311 // a zero extend and the simm5 check in isel would fail.
4312 // FIXME: Should we ignore the upper bits in isel instead?
4313 unsigned ExtOpc =
4314 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4315 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4316 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
4317 DAG.getUNDEF(VT), Scalar, VL);
4318}
4319
4320// Is this a shuffle extracts either the even or odd elements of a vector?
4321// That is, specifically, either (a) or (b) below.
4322// t34: v8i8 = extract_subvector t11, Constant:i64<0>
4323// t33: v8i8 = extract_subvector t11, Constant:i64<8>
4324// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4325// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4326// Returns {Src Vector, Even Elements} om success
4327static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4328 SDValue V2, ArrayRef<int> Mask,
4329 const RISCVSubtarget &Subtarget) {
4330 // Need to be able to widen the vector.
4331 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4332 return false;
4333
4334 // Both input must be extracts.
4335 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4336 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4337 return false;
4338
4339 // Extracting from the same source.
4340 SDValue Src = V1.getOperand(0);
4341 if (Src != V2.getOperand(0))
4342 return false;
4343
4344 // Src needs to have twice the number of elements.
4345 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4346 return false;
4347
4348 // The extracts must extract the two halves of the source.
4349 if (V1.getConstantOperandVal(1) != 0 ||
4350 V2.getConstantOperandVal(1) != Mask.size())
4351 return false;
4352
4353 // First index must be the first even or odd element from V1.
4354 if (Mask[0] != 0 && Mask[0] != 1)
4355 return false;
4356
4357 // The others must increase by 2 each time.
4358 // TODO: Support undef elements?
4359 for (unsigned i = 1; i != Mask.size(); ++i)
4360 if (Mask[i] != Mask[i - 1] + 2)
4361 return false;
4362
4363 return true;
4364}
4365
4366/// Is this shuffle interleaving contiguous elements from one vector into the
4367/// even elements and contiguous elements from another vector into the odd
4368/// elements. \p EvenSrc will contain the element that should be in the first
4369/// even element. \p OddSrc will contain the element that should be in the first
4370/// odd element. These can be the first element in a source or the element half
4371/// way through the source.
4372static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4373 int &OddSrc, const RISCVSubtarget &Subtarget) {
4374 // We need to be able to widen elements to the next larger integer type.
4375 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4376 return false;
4377
4378 int Size = Mask.size();
4379 int NumElts = VT.getVectorNumElements();
4380 assert(Size == (int)NumElts && "Unexpected mask size");
4381
4382 SmallVector<unsigned, 2> StartIndexes;
4383 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4384 return false;
4385
4386 EvenSrc = StartIndexes[0];
4387 OddSrc = StartIndexes[1];
4388
4389 // One source should be low half of first vector.
4390 if (EvenSrc != 0 && OddSrc != 0)
4391 return false;
4392
4393 // Subvectors will be subtracted from either at the start of the two input
4394 // vectors, or at the start and middle of the first vector if it's an unary
4395 // interleave.
4396 // In both cases, HalfNumElts will be extracted.
4397 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4398 // we'll create an illegal extract_subvector.
4399 // FIXME: We could support other values using a slidedown first.
4400 int HalfNumElts = NumElts / 2;
4401 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4402}
4403
4404/// Match shuffles that concatenate two vectors, rotate the concatenation,
4405/// and then extract the original number of elements from the rotated result.
4406/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4407/// returned rotation amount is for a rotate right, where elements move from
4408/// higher elements to lower elements. \p LoSrc indicates the first source
4409/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4410/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4411/// 0 or 1 if a rotation is found.
4412///
4413/// NOTE: We talk about rotate to the right which matches how bit shift and
4414/// rotate instructions are described where LSBs are on the right, but LLVM IR
4415/// and the table below write vectors with the lowest elements on the left.
4416static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4417 int Size = Mask.size();
4418
4419 // We need to detect various ways of spelling a rotation:
4420 // [11, 12, 13, 14, 15, 0, 1, 2]
4421 // [-1, 12, 13, 14, -1, -1, 1, -1]
4422 // [-1, -1, -1, -1, -1, -1, 1, 2]
4423 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4424 // [-1, 4, 5, 6, -1, -1, 9, -1]
4425 // [-1, 4, 5, 6, -1, -1, -1, -1]
4426 int Rotation = 0;
4427 LoSrc = -1;
4428 HiSrc = -1;
4429 for (int i = 0; i != Size; ++i) {
4430 int M = Mask[i];
4431 if (M < 0)
4432 continue;
4433
4434 // Determine where a rotate vector would have started.
4435 int StartIdx = i - (M % Size);
4436 // The identity rotation isn't interesting, stop.
4437 if (StartIdx == 0)
4438 return -1;
4439
4440 // If we found the tail of a vector the rotation must be the missing
4441 // front. If we found the head of a vector, it must be how much of the
4442 // head.
4443 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4444
4445 if (Rotation == 0)
4446 Rotation = CandidateRotation;
4447 else if (Rotation != CandidateRotation)
4448 // The rotations don't match, so we can't match this mask.
4449 return -1;
4450
4451 // Compute which value this mask is pointing at.
4452 int MaskSrc = M < Size ? 0 : 1;
4453
4454 // Compute which of the two target values this index should be assigned to.
4455 // This reflects whether the high elements are remaining or the low elemnts
4456 // are remaining.
4457 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4458
4459 // Either set up this value if we've not encountered it before, or check
4460 // that it remains consistent.
4461 if (TargetSrc < 0)
4462 TargetSrc = MaskSrc;
4463 else if (TargetSrc != MaskSrc)
4464 // This may be a rotation, but it pulls from the inputs in some
4465 // unsupported interleaving.
4466 return -1;
4467 }
4468
4469 // Check that we successfully analyzed the mask, and normalize the results.
4470 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4471 assert((LoSrc >= 0 || HiSrc >= 0) &&
4472 "Failed to find a rotated input vector!");
4473
4474 return Rotation;
4475}
4476
4477// Lower a deinterleave shuffle to vnsrl.
4478// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4479// -> [p, q, r, s] (EvenElts == false)
4480// VT is the type of the vector to return, <[vscale x ]n x ty>
4481// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4483 bool EvenElts,
4484 const RISCVSubtarget &Subtarget,
4485 SelectionDAG &DAG) {
4486 // The result is a vector of type <m x n x ty>
4487 MVT ContainerVT = VT;
4488 // Convert fixed vectors to scalable if needed
4489 if (ContainerVT.isFixedLengthVector()) {
4490 assert(Src.getSimpleValueType().isFixedLengthVector());
4491 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
4492
4493 // The source is a vector of type <m x n*2 x ty>
4494 MVT SrcContainerVT =
4496 ContainerVT.getVectorElementCount() * 2);
4497 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4498 }
4499
4500 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4501
4502 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4503 // This also converts FP to int.
4504 unsigned EltBits = ContainerVT.getScalarSizeInBits();
4505 MVT WideSrcContainerVT = MVT::getVectorVT(
4506 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
4507 Src = DAG.getBitcast(WideSrcContainerVT, Src);
4508
4509 // The integer version of the container type.
4510 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
4511
4512 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4513 // the original element size.
4514 unsigned Shift = EvenElts ? 0 : EltBits;
4515 SDValue SplatShift = DAG.getNode(
4516 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
4517 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
4518 SDValue Res =
4519 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
4520 DAG.getUNDEF(IntContainerVT), TrueMask, VL);
4521 // Cast back to FP if needed.
4522 Res = DAG.getBitcast(ContainerVT, Res);
4523
4524 if (VT.isFixedLengthVector())
4525 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
4526 return Res;
4527}
4528
4529// Lower the following shuffle to vslidedown.
4530// a)
4531// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4532// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4533// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4534// b)
4535// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4536// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4537// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4538// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4539// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4540// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4542 SDValue V1, SDValue V2,
4543 ArrayRef<int> Mask,
4544 const RISCVSubtarget &Subtarget,
4545 SelectionDAG &DAG) {
4546 auto findNonEXTRACT_SUBVECTORParent =
4547 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4548 uint64_t Offset = 0;
4549 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4550 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4551 // a scalable vector. But we don't want to match the case.
4552 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4553 Offset += Parent.getConstantOperandVal(1);
4554 Parent = Parent.getOperand(0);
4555 }
4556 return std::make_pair(Parent, Offset);
4557 };
4558
4559 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4560 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4561
4562 // Extracting from the same source.
4563 SDValue Src = V1Src;
4564 if (Src != V2Src)
4565 return SDValue();
4566
4567 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4568 SmallVector<int, 16> NewMask(Mask);
4569 for (size_t i = 0; i != NewMask.size(); ++i) {
4570 if (NewMask[i] == -1)
4571 continue;
4572
4573 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4574 NewMask[i] = NewMask[i] + V1IndexOffset;
4575 } else {
4576 // Minus NewMask.size() is needed. Otherwise, the b case would be
4577 // <5,6,7,12> instead of <5,6,7,8>.
4578 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4579 }
4580 }
4581
4582 // First index must be known and non-zero. It will be used as the slidedown
4583 // amount.
4584 if (NewMask[0] <= 0)
4585 return SDValue();
4586
4587 // NewMask is also continuous.
4588 for (unsigned i = 1; i != NewMask.size(); ++i)
4589 if (NewMask[i - 1] + 1 != NewMask[i])
4590 return SDValue();
4591
4592 MVT XLenVT = Subtarget.getXLenVT();
4593 MVT SrcVT = Src.getSimpleValueType();
4594 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4595 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4596 SDValue Slidedown =
4597 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4598 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4599 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4600 return DAG.getNode(
4602 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4603 DAG.getConstant(0, DL, XLenVT));
4604}
4605
4606// Because vslideup leaves the destination elements at the start intact, we can
4607// use it to perform shuffles that insert subvectors:
4608//
4609// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4610// ->
4611// vsetvli zero, 8, e8, mf2, ta, ma
4612// vslideup.vi v8, v9, 4
4613//
4614// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4615// ->
4616// vsetvli zero, 5, e8, mf2, tu, ma
4617// vslideup.v1 v8, v9, 2
4619 SDValue V1, SDValue V2,
4620 ArrayRef<int> Mask,
4621 const RISCVSubtarget &Subtarget,
4622 SelectionDAG &DAG) {
4623 unsigned NumElts = VT.getVectorNumElements();
4624 int NumSubElts, Index;
4625 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4626 Index))
4627 return SDValue();
4628
4629 bool OpsSwapped = Mask[Index] < (int)NumElts;
4630 SDValue InPlace = OpsSwapped ? V2 : V1;
4631 SDValue ToInsert = OpsSwapped ? V1 : V2;
4632
4633 MVT XLenVT = Subtarget.getXLenVT();
4634 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4635 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4636 // We slide up by the index that the subvector is being inserted at, and set
4637 // VL to the index + the number of elements being inserted.
4639 // If the we're adding a suffix to the in place vector, i.e. inserting right
4640 // up to the very end of it, then we don't actually care about the tail.
4641 if (NumSubElts + Index >= (int)NumElts)
4642 Policy |= RISCVII::TAIL_AGNOSTIC;
4643
4644 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4645 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4646 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4647
4648 SDValue Res;
4649 // If we're inserting into the lowest elements, use a tail undisturbed
4650 // vmv.v.v.
4651 if (Index == 0)
4652 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4653 VL);
4654 else
4655 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4656 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4657 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4658}
4659
4660/// Match v(f)slide1up/down idioms. These operations involve sliding
4661/// N-1 elements to make room for an inserted scalar at one end.
4663 SDValue V1, SDValue V2,
4664 ArrayRef<int> Mask,
4665 const RISCVSubtarget &Subtarget,
4666 SelectionDAG &DAG) {
4667 bool OpsSwapped = false;
4668 if (!isa<BuildVectorSDNode>(V1)) {
4669 if (!isa<BuildVectorSDNode>(V2))
4670 return SDValue();
4671 std::swap(V1, V2);
4672 OpsSwapped = true;
4673 }
4674 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4675 if (!Splat)
4676 return SDValue();
4677
4678 // Return true if the mask could describe a slide of Mask.size() - 1
4679 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4680 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4681 const unsigned S = (Offset > 0) ? 0 : -Offset;
4682 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4683 for (unsigned i = S; i != E; ++i)
4684 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4685 return false;
4686 return true;
4687 };
4688
4689 const unsigned NumElts = VT.getVectorNumElements();
4690 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4691 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4692 return SDValue();
4693
4694 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4695 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4696 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4697 return SDValue();
4698
4699 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4700 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4701 auto OpCode = IsVSlidedown ?
4704 if (!VT.isFloatingPoint())
4705 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4706 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4707 DAG.getUNDEF(ContainerVT),
4708 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4709 Splat, TrueMask, VL);
4710 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4711}
4712
4713// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4714// to create an interleaved vector of <[vscale x] n*2 x ty>.
4715// This requires that the size of ty is less than the subtarget's maximum ELEN.
4717 const SDLoc &DL, SelectionDAG &DAG,
4718 const RISCVSubtarget &Subtarget) {
4719 MVT VecVT = EvenV.getSimpleValueType();
4720 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4721 // Convert fixed vectors to scalable if needed
4722 if (VecContainerVT.isFixedLengthVector()) {
4723 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4724 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4725 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4726 }
4727
4728 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4729
4730 // We're working with a vector of the same size as the resulting
4731 // interleaved vector, but with half the number of elements and
4732 // twice the SEW (Hence the restriction on not using the maximum
4733 // ELEN)
4734 MVT WideVT =
4736 VecVT.getVectorElementCount());
4737 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4738 if (WideContainerVT.isFixedLengthVector())
4739 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4740
4741 // Bitcast the input vectors to integers in case they are FP
4742 VecContainerVT = VecContainerVT.changeTypeToInteger();
4743 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4744 OddV = DAG.getBitcast(VecContainerVT, OddV);
4745
4746 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4747 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4748
4749 SDValue Interleaved;
4750 if (OddV.isUndef()) {
4751 // If OddV is undef, this is a zero extend.
4752 // FIXME: Not only does this optimize the code, it fixes some correctness
4753 // issues because MIR does not have freeze.
4754 Interleaved =
4755 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);
4756 } else if (Subtarget.hasStdExtZvbb()) {
4757 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4758 SDValue OffsetVec =
4759 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4760 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4761 OffsetVec, Passthru, Mask, VL);
4762 if (!EvenV.isUndef())
4763 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4764 Interleaved, EvenV, Passthru, Mask, VL);
4765 } else if (EvenV.isUndef()) {
4766 Interleaved =
4767 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);
4768
4769 SDValue OffsetVec =
4770 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
4771 Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
4772 Interleaved, OffsetVec, Passthru, Mask, VL);
4773 } else {
4774 // FIXME: We should freeze the odd vector here. We already handled the case
4775 // of provably undef/poison above.
4776
4777 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4778 // vwaddu.vv
4779 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4780 OddV, Passthru, Mask, VL);
4781
4782 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4783 SDValue AllOnesVec = DAG.getSplatVector(
4784 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4785 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4786 OddV, AllOnesVec, Passthru, Mask, VL);
4787
4788 // Add the two together so we get
4789 // (OddV * 0xff...ff) + (OddV + EvenV)
4790 // = (OddV * 0x100...00) + EvenV
4791 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4792 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4793 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4794 Interleaved, OddsMul, Passthru, Mask, VL);
4795 }
4796
4797 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4798 MVT ResultContainerVT = MVT::getVectorVT(
4799 VecVT.getVectorElementType(), // Make sure to use original type
4800 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4801 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4802
4803 // Convert back to a fixed vector if needed
4804 MVT ResultVT =
4807 if (ResultVT.isFixedLengthVector())
4808 Interleaved =
4809 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4810
4811 return Interleaved;
4812}
4813
4814// If we have a vector of bits that we want to reverse, we can use a vbrev on a
4815// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4817 SelectionDAG &DAG,
4818 const RISCVSubtarget &Subtarget) {
4819 SDLoc DL(SVN);
4820 MVT VT = SVN->getSimpleValueType(0);
4821 SDValue V = SVN->getOperand(0);
4822 unsigned NumElts = VT.getVectorNumElements();
4823
4824 assert(VT.getVectorElementType() == MVT::i1);
4825
4827 SVN->getMask().size()) ||
4828 !SVN->getOperand(1).isUndef())
4829 return SDValue();
4830
4831 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
4832 EVT ViaVT = EVT::getVectorVT(
4833 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
4834 EVT ViaBitVT =
4835 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
4836
4837 // If we don't have zvbb or the larger element type > ELEN, the operation will
4838 // be illegal.
4840 ViaVT) ||
4841 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
4842 return SDValue();
4843
4844 // If the bit vector doesn't fit exactly into the larger element type, we need
4845 // to insert it into the larger vector and then shift up the reversed bits
4846 // afterwards to get rid of the gap introduced.
4847 if (ViaEltSize > NumElts)
4848 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
4849 V, DAG.getVectorIdxConstant(0, DL));
4850
4851 SDValue Res =
4852 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
4853
4854 // Shift up the reversed bits if the vector didn't exactly fit into the larger
4855 // element type.
4856 if (ViaEltSize > NumElts)
4857 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
4858 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
4859
4860 Res = DAG.getBitcast(ViaBitVT, Res);
4861
4862 if (ViaEltSize > NumElts)
4863 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
4864 DAG.getVectorIdxConstant(0, DL));
4865 return Res;
4866}
4867
4869 SelectionDAG &DAG,
4870 const RISCVSubtarget &Subtarget,
4871 MVT &RotateVT, unsigned &RotateAmt) {
4872 SDLoc DL(SVN);
4873
4874 EVT VT = SVN->getValueType(0);
4875 unsigned NumElts = VT.getVectorNumElements();
4876 unsigned EltSizeInBits = VT.getScalarSizeInBits();
4877 unsigned NumSubElts;
4878 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
4879 NumElts, NumSubElts, RotateAmt))
4880 return false;
4881 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
4882 NumElts / NumSubElts);
4883
4884 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4885 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
4886}
4887
4888// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4889// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4890// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4892 SelectionDAG &DAG,
4893 const RISCVSubtarget &Subtarget) {
4894 SDLoc DL(SVN);
4895
4896 EVT VT = SVN->getValueType(0);
4897 unsigned RotateAmt;
4898 MVT RotateVT;
4899 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4900 return SDValue();
4901
4902 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
4903
4904 SDValue Rotate;
4905 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4906 // so canonicalize to vrev8.
4907 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
4908 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
4909 else
4910 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
4911 DAG.getConstant(RotateAmt, DL, RotateVT));
4912
4913 return DAG.getBitcast(VT, Rotate);
4914}
4915
4916// If compiling with an exactly known VLEN, see if we can split a
4917// shuffle on m2 or larger into a small number of m1 sized shuffles
4918// which write each destination registers exactly once.
4920 SelectionDAG &DAG,
4921 const RISCVSubtarget &Subtarget) {
4922 SDLoc DL(SVN);
4923 MVT VT = SVN->getSimpleValueType(0);
4924 SDValue V1 = SVN->getOperand(0);
4925 SDValue V2 = SVN->getOperand(1);
4926 ArrayRef<int> Mask = SVN->getMask();
4927 unsigned NumElts = VT.getVectorNumElements();
4928
4929 // If we don't know exact data layout, not much we can do. If this
4930 // is already m1 or smaller, no point in splitting further.
4931 const auto VLen = Subtarget.getRealVLen();
4932 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
4933 return SDValue();
4934
4935 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
4936 // expansion for.
4937 unsigned RotateAmt;
4938 MVT RotateVT;
4939 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4940 return SDValue();
4941
4942 MVT ElemVT = VT.getVectorElementType();
4943 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4944 unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
4945
4947 OutMasks(VRegsPerSrc, {-1, {}});
4948
4949 // Check if our mask can be done as a 1-to-1 mapping from source
4950 // to destination registers in the group without needing to
4951 // write each destination more than once.
4952 for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
4953 int DstVecIdx = DstIdx / ElemsPerVReg;
4954 int DstSubIdx = DstIdx % ElemsPerVReg;
4955 int SrcIdx = Mask[DstIdx];
4956 if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
4957 continue;
4958 int SrcVecIdx = SrcIdx / ElemsPerVReg;
4959 int SrcSubIdx = SrcIdx % ElemsPerVReg;
4960 if (OutMasks[DstVecIdx].first == -1)
4961 OutMasks[DstVecIdx].first = SrcVecIdx;
4962 if (OutMasks[DstVecIdx].first != SrcVecIdx)
4963 // Note: This case could easily be handled by keeping track of a chain
4964 // of source values and generating two element shuffles below. This is
4965 // less an implementation question, and more a profitability one.
4966 return SDValue();
4967
4968 OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
4969 OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
4970 }
4971
4972 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4973 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4974 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4975 assert(M1VT == getLMUL1VT(M1VT));
4976 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4977 SDValue Vec = DAG.getUNDEF(ContainerVT);
4978 // The following semantically builds up a fixed length concat_vector
4979 // of the component shuffle_vectors. We eagerly lower to scalable here
4980 // to avoid DAG combining it back to a large shuffle_vector again.
4981 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4982 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
4983 for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
4984 auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
4985 if (SrcVecIdx == -1)
4986 continue;
4987 unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
4988 SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
4989 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
4990 DAG.getVectorIdxConstant(ExtractIdx, DL));
4991 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
4992 SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
4993 SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
4994 unsigned InsertIdx = DstVecIdx * NumOpElts;
4995 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,
4996 DAG.getVectorIdxConstant(InsertIdx, DL));
4997 }
4998 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4999}
5000
5002 const RISCVSubtarget &Subtarget) {
5003 SDValue V1 = Op.getOperand(0);
5004 SDValue V2 = Op.getOperand(1);
5005 SDLoc DL(Op);
5006 MVT XLenVT = Subtarget.getXLenVT();
5007 MVT VT = Op.getSimpleValueType();
5008 unsigned NumElts = VT.getVectorNumElements();
5009 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
5010
5011 if (VT.getVectorElementType() == MVT::i1) {
5012 // Lower to a vror.vi of a larger element type if possible before we promote
5013 // i1s to i8s.
5014 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5015 return V;
5016 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5017 return V;
5018
5019 // Promote i1 shuffle to i8 shuffle.
5020 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5021 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5022 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5023 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5024 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5025 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5026 ISD::SETNE);
5027 }
5028
5029 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5030
5031 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5032
5033 if (SVN->isSplat()) {
5034 const int Lane = SVN->getSplatIndex();
5035 if (Lane >= 0) {
5036 MVT SVT = VT.getVectorElementType();
5037
5038 // Turn splatted vector load into a strided load with an X0 stride.
5039 SDValue V = V1;
5040 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5041 // with undef.
5042 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5043 int Offset = Lane;
5044 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5045 int OpElements =
5046 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5047 V = V.getOperand(Offset / OpElements);
5048 Offset %= OpElements;
5049 }
5050
5051 // We need to ensure the load isn't atomic or volatile.
5052 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5053 auto *Ld = cast<LoadSDNode>(V);
5054 Offset *= SVT.getStoreSize();
5055 SDValue NewAddr = DAG.getMemBasePlusOffset(
5056 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5057
5058 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5059 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5060 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5061 SDValue IntID =
5062 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5063 SDValue Ops[] = {Ld->getChain(),
5064 IntID,
5065 DAG.getUNDEF(ContainerVT),
5066 NewAddr,
5067 DAG.getRegister(RISCV::X0, XLenVT),
5068 VL};
5069 SDValue NewLoad = DAG.getMemIntrinsicNode(
5070 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5072 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5073 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5074 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5075 }
5076
5077 MVT SplatVT = ContainerVT;
5078
5079 // If we don't have Zfh, we need to use an integer scalar load.
5080 if (SVT == MVT::f16 && !Subtarget.hasStdExtZfh()) {
5081 SVT = MVT::i16;
5082 SplatVT = ContainerVT.changeVectorElementType(SVT);
5083 }
5084
5085 // Otherwise use a scalar load and splat. This will give the best
5086 // opportunity to fold a splat into the operation. ISel can turn it into
5087 // the x0 strided load if we aren't able to fold away the select.
5088 if (SVT.isFloatingPoint())
5089 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5090 Ld->getPointerInfo().getWithOffset(Offset),
5091 Ld->getOriginalAlign(),
5092 Ld->getMemOperand()->getFlags());
5093 else
5094 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5095 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5096 Ld->getOriginalAlign(),
5097 Ld->getMemOperand()->getFlags());
5099
5100 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5102 SDValue Splat =
5103 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5104 Splat = DAG.getBitcast(ContainerVT, Splat);
5105 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5106 }
5107
5108 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5109 assert(Lane < (int)NumElts && "Unexpected lane!");
5110 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5111 V1, DAG.getConstant(Lane, DL, XLenVT),
5112 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5113 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5114 }
5115 }
5116
5117 // For exact VLEN m2 or greater, try to split to m1 operations if we
5118 // can split cleanly.
5119 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5120 return V;
5121
5122 ArrayRef<int> Mask = SVN->getMask();
5123
5124 if (SDValue V =
5125 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5126 return V;
5127
5128 if (SDValue V =
5129 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5130 return V;
5131
5132 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5133 // available.
5134 if (Subtarget.hasStdExtZvkb())
5135 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5136 return V;
5137
5138 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5139 // be undef which can be handled with a single SLIDEDOWN/UP.
5140 int LoSrc, HiSrc;
5141 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5142 if (Rotation > 0) {
5143 SDValue LoV, HiV;
5144 if (LoSrc >= 0) {
5145 LoV = LoSrc == 0 ? V1 : V2;
5146 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5147 }
5148 if (HiSrc >= 0) {
5149 HiV = HiSrc == 0 ? V1 : V2;
5150 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5151 }
5152
5153 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5154 // to slide LoV up by (NumElts - Rotation).
5155 unsigned InvRotate = NumElts - Rotation;
5156
5157 SDValue Res = DAG.getUNDEF(ContainerVT);
5158 if (HiV) {
5159 // Even though we could use a smaller VL, don't to avoid a vsetivli
5160 // toggle.
5161 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5162 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5163 }
5164 if (LoV)
5165 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5166 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5168
5169 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5170 }
5171
5172 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef())
5173 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5174
5175 // If this is a deinterleave and we can widen the vector, then we can use
5176 // vnsrl to deinterleave.
5177 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
5178 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
5179 Subtarget, DAG);
5180 }
5181
5182 if (SDValue V =
5183 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5184 return V;
5185
5186 // Detect an interleave shuffle and lower to
5187 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5188 int EvenSrc, OddSrc;
5189 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5190 // Extract the halves of the vectors.
5191 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5192
5193 int Size = Mask.size();
5194 SDValue EvenV, OddV;
5195 assert(EvenSrc >= 0 && "Undef source?");
5196 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5197 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5198 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5199
5200 assert(OddSrc >= 0 && "Undef source?");
5201 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5202 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5203 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5204
5205 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5206 }
5207
5208
5209 // Handle any remaining single source shuffles
5210 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5211 if (V2.isUndef()) {
5212 // We might be able to express the shuffle as a bitrotate. But even if we
5213 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5214 // shifts and a vor will have a higher throughput than a vrgather.
5215 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5216 return V;
5217
5218 if (VT.getScalarSizeInBits() == 8 &&
5219 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5220 // On such a vector we're unable to use i8 as the index type.
5221 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5222 // may involve vector splitting if we're already at LMUL=8, or our
5223 // user-supplied maximum fixed-length LMUL.
5224 return SDValue();
5225 }
5226
5227 // Base case for the two operand recursion below - handle the worst case
5228 // single source shuffle.
5229 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5230 MVT IndexVT = VT.changeTypeToInteger();
5231 // Since we can't introduce illegal index types at this stage, use i16 and
5232 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5233 // than XLenVT.
5234 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5235 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5236 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5237 }
5238
5239 // If the mask allows, we can do all the index computation in 16 bits. This
5240 // requires less work and less register pressure at high LMUL, and creates
5241 // smaller constants which may be cheaper to materialize.
5242 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5243 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5244 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5245 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5246 }
5247
5248 MVT IndexContainerVT =
5249 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5250
5251 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5252 SmallVector<SDValue> GatherIndicesLHS;
5253 for (int MaskIndex : Mask) {
5254 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5255 GatherIndicesLHS.push_back(IsLHSIndex
5256 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5257 : DAG.getUNDEF(XLenVT));
5258 }
5259 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5260 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5261 Subtarget);
5262 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5263 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5264 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5265 }
5266
5267 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5268 // merged with a second vrgather.
5269 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5270
5271 // Now construct the mask that will be used by the blended vrgather operation.
5272 // Construct the appropriate indices into each vector.
5273 for (int MaskIndex : Mask) {
5274 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5275 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5276 ? MaskIndex : -1);
5277 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5278 }
5279
5280 // Try to pick a profitable operand order.
5281 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5282 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
5283
5284 // Recursively invoke lowering for each operand if we had two
5285 // independent single source shuffles, and then combine the result via a
5286 // vselect. Note that the vselect will likely be folded back into the
5287 // second permute (vrgather, or other) by the post-isel combine.
5288 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5289 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5290
5291 SmallVector<SDValue> MaskVals;
5292 for (int MaskIndex : Mask) {
5293 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5294 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5295 }
5296
5297 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5298 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5299 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5300
5301 if (SwapOps)
5302 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5303 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5304}
5305
5307 // Support splats for any type. These should type legalize well.
5308 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5309 return true;
5310
5311 // Only support legal VTs for other shuffles for now.
5312 if (!isTypeLegal(VT))
5313 return false;
5314
5315 MVT SVT = VT.getSimpleVT();
5316
5317 // Not for i1 vectors.
5318 if (SVT.getScalarType() == MVT::i1)
5319 return false;
5320
5321 int Dummy1, Dummy2;
5322 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5323 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5324}
5325
5326// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5327// the exponent.
5328SDValue
5329RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5330 SelectionDAG &DAG) const {
5331 MVT VT = Op.getSimpleValueType();
5332 unsigned EltSize = VT.getScalarSizeInBits();
5333 SDValue Src = Op.getOperand(0);
5334 SDLoc DL(Op);
5335 MVT ContainerVT = VT;
5336
5337 SDValue Mask, VL;
5338 if (Op->isVPOpcode()) {
5339 Mask = Op.getOperand(1);
5340 if (VT.isFixedLengthVector())
5341 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5342 Subtarget);
5343 VL = Op.getOperand(2);
5344 }
5345
5346 // We choose FP type that can represent the value if possible. Otherwise, we
5347 // use rounding to zero conversion for correct exponent of the result.
5348 // TODO: Use f16 for i8 when possible?
5349 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5350 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5351 FloatEltVT = MVT::f32;
5352 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5353
5354 // Legal types should have been checked in the RISCVTargetLowering
5355 // constructor.
5356 // TODO: Splitting may make sense in some cases.
5357 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5358 "Expected legal float type!");
5359
5360 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5361 // The trailing zero count is equal to log2 of this single bit value.
5362 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5363 SDValue Neg = DAG.getNegative(Src, DL, VT);
5364 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5365 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5366 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5367 Src, Mask, VL);
5368 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5369 }
5370
5371 // We have a legal FP type, convert to it.
5372 SDValue FloatVal;
5373 if (FloatVT.bitsGT(VT)) {
5374 if (Op->isVPOpcode())
5375 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5376 else
5377 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5378 } else {
5379 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5380 if (VT.isFixedLengthVector()) {
5381 ContainerVT = getContainerForFixedLengthVector(VT);
5382 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5383 }
5384 if (!Op->isVPOpcode())
5385 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5386 SDValue RTZRM =
5388 MVT ContainerFloatVT =
5389 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5390 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5391 Src, Mask, RTZRM, VL);
5392 if (VT.isFixedLengthVector())
5393 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5394 }
5395 // Bitcast to integer and shift the exponent to the LSB.
5396 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5397 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5398 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5399
5400 SDValue Exp;
5401 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5402 if (Op->isVPOpcode()) {
5403 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
5404 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5405 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5406 } else {
5407 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5408 DAG.getConstant(ShiftAmt, DL, IntVT));
5409 if (IntVT.bitsLT(VT))
5410 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5411 else if (IntVT.bitsGT(VT))
5412 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5413 }
5414
5415 // The exponent contains log2 of the value in biased form.
5416 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5417 // For trailing zeros, we just need to subtract the bias.
5418 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5419 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5420 DAG.getConstant(ExponentBias, DL, VT));
5421 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5422 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5423 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5424
5425 // For leading zeros, we need to remove the bias and convert from log2 to
5426 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5427 unsigned Adjust = ExponentBias + (EltSize - 1);
5428 SDValue Res;
5429 if (Op->isVPOpcode())
5430 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5431 Mask, VL);
5432 else
5433 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5434
5435 // The above result with zero input equals to Adjust which is greater than
5436 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5437 if (Op.getOpcode() == ISD::CTLZ)
5438 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5439 else if (Op.getOpcode() == ISD::VP_CTLZ)
5440 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5441 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5442 return Res;
5443}
5444
5445SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
5446 SelectionDAG &DAG) const {
5447 SDLoc DL(Op);
5448 MVT XLenVT = Subtarget.getXLenVT();
5449 SDValue Source = Op->getOperand(0);
5450 MVT SrcVT = Source.getSimpleValueType();
5451 SDValue Mask = Op->getOperand(1);
5452 SDValue EVL = Op->getOperand(2);
5453
5454 if (SrcVT.isFixedLengthVector()) {
5455 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
5456 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
5457 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5458 Subtarget);
5459 SrcVT = ContainerVT;
5460 }
5461
5462 // Convert to boolean vector.
5463 if (SrcVT.getScalarType() != MVT::i1) {
5464 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
5465 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
5466 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
5467 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
5468 DAG.getUNDEF(SrcVT), Mask, EVL});
5469 }
5470
5471 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
5472 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
5473 // In this case, we can interpret poison as -1, so nothing to do further.
5474 return Res;
5475
5476 // Convert -1 to VL.
5477 SDValue SetCC =
5478 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
5479 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
5480 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
5481}
5482
5483// While RVV has alignment restrictions, we should always be able to load as a
5484// legal equivalently-sized byte-typed vector instead. This method is
5485// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5486// the load is already correctly-aligned, it returns SDValue().
5487SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5488 SelectionDAG &DAG) const {
5489 auto *Load = cast<LoadSDNode>(Op);
5490 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5491
5493 Load->getMemoryVT(),
5494 *Load->getMemOperand()))
5495 return SDValue();
5496
5497 SDLoc DL(Op);
5498 MVT VT = Op.getSimpleValueType();
5499 unsigned EltSizeBits = VT.getScalarSizeInBits();
5500 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5501 "Unexpected unaligned RVV load type");
5502 MVT NewVT =
5503 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5504 assert(NewVT.isValid() &&
5505 "Expecting equally-sized RVV vector types to be legal");
5506 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5507 Load->getPointerInfo(), Load->getOriginalAlign(),
5508 Load->getMemOperand()->getFlags());
5509 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5510}
5511
5512// While RVV has alignment restrictions, we should always be able to store as a
5513// legal equivalently-sized byte-typed vector instead. This method is
5514// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5515// returns SDValue() if the store is already correctly aligned.
5516SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5517 SelectionDAG &DAG) const {
5518 auto *Store = cast<StoreSDNode>(Op);
5519 assert(Store && Store->getValue().getValueType().isVector() &&
5520 "Expected vector store");
5521
5523 Store->getMemoryVT(),
5524 *Store->getMemOperand()))
5525 return SDValue();
5526
5527 SDLoc DL(Op);
5528 SDValue StoredVal = Store->getValue();
5529 MVT VT = StoredVal.getSimpleValueType();
5530 unsigned EltSizeBits = VT.getScalarSizeInBits();
5531 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5532 "Unexpected unaligned RVV store type");
5533 MVT NewVT =
5534 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5535 assert(NewVT.isValid() &&
5536 "Expecting equally-sized RVV vector types to be legal");
5537 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5538 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5539 Store->getPointerInfo(), Store->getOriginalAlign(),
5540 Store->getMemOperand()->getFlags());
5541}
5542
5544 const RISCVSubtarget &Subtarget) {
5545 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5546
5547 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5548
5549 // All simm32 constants should be handled by isel.
5550 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5551 // this check redundant, but small immediates are common so this check
5552 // should have better compile time.
5553 if (isInt<32>(Imm))
5554 return Op;
5555
5556 // We only need to cost the immediate, if constant pool lowering is enabled.
5557 if (!Subtarget.useConstantPoolForLargeInts())
5558 return Op;
5559
5561 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5562 return Op;
5563
5564 // Optimizations below are disabled for opt size. If we're optimizing for
5565 // size, use a constant pool.
5566 if (DAG.shouldOptForSize())
5567 return SDValue();
5568
5569 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5570 // that if it will avoid a constant pool.
5571 // It will require an extra temporary register though.
5572 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5573 // low and high 32 bits are the same and bit 31 and 63 are set.
5574 unsigned ShiftAmt, AddOpc;
5575 RISCVMatInt::InstSeq SeqLo =
5576 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
5577 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5578 return Op;
5579
5580 return SDValue();
5581}
5582
5584 const RISCVSubtarget &Subtarget) {
5585 SDLoc dl(Op);
5586 AtomicOrdering FenceOrdering =
5587 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5588 SyncScope::ID FenceSSID =
5589 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5590
5591 if (Subtarget.hasStdExtZtso()) {
5592 // The only fence that needs an instruction is a sequentially-consistent
5593 // cross-thread fence.
5594 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5595 FenceSSID == SyncScope::System)
5596 return Op;
5597
5598 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5599 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5600 }
5601
5602 // singlethread fences only synchronize with signal handlers on the same
5603 // thread and thus only need to preserve instruction order, not actually
5604 // enforce memory ordering.
5605 if (FenceSSID == SyncScope::SingleThread)
5606 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5607 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5608
5609 return Op;
5610}
5611
5612SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5613 SelectionDAG &DAG) const {
5614 SDLoc DL(Op);
5615 MVT VT = Op.getSimpleValueType();
5616 MVT XLenVT = Subtarget.getXLenVT();
5617 unsigned Check = Op.getConstantOperandVal(1);
5618 unsigned TDCMask = 0;
5619 if (Check & fcSNan)
5620 TDCMask |= RISCV::FPMASK_Signaling_NaN;
5621 if (Check & fcQNan)
5622 TDCMask |= RISCV::FPMASK_Quiet_NaN;
5623 if (Check & fcPosInf)
5625 if (Check & fcNegInf)
5627 if (Check & fcPosNormal)
5629 if (Check & fcNegNormal)
5631 if (Check & fcPosSubnormal)
5633 if (Check & fcNegSubnormal)
5635 if (Check & fcPosZero)
5636 TDCMask |= RISCV::FPMASK_Positive_Zero;
5637 if (Check & fcNegZero)
5638 TDCMask |= RISCV::FPMASK_Negative_Zero;
5639
5640 bool IsOneBitMask = isPowerOf2_32(TDCMask);
5641
5642 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
5643
5644 if (VT.isVector()) {
5645 SDValue Op0 = Op.getOperand(0);
5646 MVT VT0 = Op.getOperand(0).getSimpleValueType();
5647
5648 if (VT.isScalableVector()) {
5650 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
5651 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5652 Mask = Op.getOperand(2);
5653 VL = Op.getOperand(3);
5654 }
5655 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
5656 VL, Op->getFlags());
5657 if (IsOneBitMask)
5658 return DAG.getSetCC(DL, VT, FPCLASS,
5659 DAG.getConstant(TDCMask, DL, DstVT),
5661 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
5662 DAG.getConstant(TDCMask, DL, DstVT));
5663 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
5664 ISD::SETNE);
5665 }
5666
5667 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
5668 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5669 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
5670 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
5671 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5672 Mask = Op.getOperand(2);
5673 MVT MaskContainerVT =
5674 getContainerForFixedLengthVector(Mask.getSimpleValueType());
5675 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
5676 VL = Op.getOperand(3);
5677 }
5678 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
5679
5680 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
5681 Mask, VL, Op->getFlags());
5682
5683 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5684 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
5685 if (IsOneBitMask) {
5686 SDValue VMSEQ =
5687 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5688 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
5689 DAG.getUNDEF(ContainerVT), Mask, VL});
5690 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
5691 }
5692 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
5693 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
5694
5695 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
5696 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5697 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
5698
5699 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5700 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
5701 DAG.getUNDEF(ContainerVT), Mask, VL});
5702 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
5703 }
5704
5705 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
5706 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
5707 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
5709 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
5710}
5711
5712// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5713// operations propagate nans.
5715 const RISCVSubtarget &Subtarget) {
5716 SDLoc DL(Op);
5717 MVT VT = Op.getSimpleValueType();
5718
5719 SDValue X = Op.getOperand(0);
5720 SDValue Y = Op.getOperand(1);
5721
5722 if (!VT.isVector()) {
5723 MVT XLenVT = Subtarget.getXLenVT();
5724
5725 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5726 // ensures that when one input is a nan, the other will also be a nan
5727 // allowing the nan to propagate. If both inputs are nan, this will swap the
5728 // inputs which is harmless.
5729
5730 SDValue NewY = Y;
5731 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
5732 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
5733 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
5734 }
5735
5736 SDValue NewX = X;
5737 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
5738 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
5739 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
5740 }
5741
5742 unsigned Opc =
5743 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
5744 return DAG.getNode(Opc, DL, VT, NewX, NewY);
5745 }
5746
5747 // Check no NaNs before converting to fixed vector scalable.
5748 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
5749 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
5750
5751 MVT ContainerVT = VT;
5752 if (VT.isFixedLengthVector()) {
5753 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5754 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
5755 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
5756 }
5757
5758 SDValue Mask, VL;
5759 if (Op->isVPOpcode()) {
5760 Mask = Op.getOperand(2);
5761 if (VT.isFixedLengthVector())
5762 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5763 Subtarget);
5764 VL = Op.getOperand(3);
5765 } else {
5766 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5767 }
5768
5769 SDValue NewY = Y;
5770 if (!XIsNeverNan) {
5771 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5772 {X, X, DAG.getCondCode(ISD::SETOEQ),
5773 DAG.getUNDEF(ContainerVT), Mask, VL});
5774 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
5775 DAG.getUNDEF(ContainerVT), VL);
5776 }
5777
5778 SDValue NewX = X;
5779 if (!YIsNeverNan) {
5780 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5781 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
5782 DAG.getUNDEF(ContainerVT), Mask, VL});
5783 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
5784 DAG.getUNDEF(ContainerVT), VL);
5785 }
5786
5787 unsigned Opc =
5788 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
5791 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
5792 DAG.getUNDEF(ContainerVT), Mask, VL);
5793 if (VT.isFixedLengthVector())
5794 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
5795 return Res;
5796}
5797
5798/// Get a RISC-V target specified VL op for a given SDNode.
5799static unsigned getRISCVVLOp(SDValue Op) {
5800#define OP_CASE(NODE) \
5801 case ISD::NODE: \
5802 return RISCVISD::NODE##_VL;
5803#define VP_CASE(NODE) \
5804 case ISD::VP_##NODE: \
5805 return RISCVISD::NODE##_VL;
5806 // clang-format off
5807 switch (Op.getOpcode()) {
5808 default:
5809 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5810 OP_CASE(ADD)
5811 OP_CASE(SUB)
5812 OP_CASE(MUL)
5813 OP_CASE(MULHS)
5814 OP_CASE(MULHU)
5815 OP_CASE(SDIV)
5816 OP_CASE(SREM)
5817 OP_CASE(UDIV)
5818 OP_CASE(UREM)
5819 OP_CASE(SHL)
5820 OP_CASE(SRA)
5821 OP_CASE(SRL)
5822 OP_CASE(ROTL)
5823 OP_CASE(ROTR)
5824 OP_CASE(BSWAP)
5825 OP_CASE(CTTZ)
5826 OP_CASE(CTLZ)
5827 OP_CASE(CTPOP)
5828 OP_CASE(BITREVERSE)
5829 OP_CASE(SADDSAT)
5830 OP_CASE(UADDSAT)
5831 OP_CASE(SSUBSAT)
5832 OP_CASE(USUBSAT)
5833 OP_CASE(AVGFLOORS)
5834 OP_CASE(AVGFLOORU)
5835 OP_CASE(AVGCEILS)
5836 OP_CASE(AVGCEILU)
5837 OP_CASE(FADD)
5838 OP_CASE(FSUB)
5839 OP_CASE(FMUL)
5840 OP_CASE(FDIV)
5841 OP_CASE(FNEG)
5842 OP_CASE(FABS)
5843 OP_CASE(FSQRT)
5844 OP_CASE(SMIN)
5845 OP_CASE(SMAX)
5846 OP_CASE(UMIN)
5847 OP_CASE(UMAX)
5848 OP_CASE(STRICT_FADD)
5849 OP_CASE(STRICT_FSUB)
5850 OP_CASE(STRICT_FMUL)
5851 OP_CASE(STRICT_FDIV)
5852 OP_CASE(STRICT_FSQRT)
5853 VP_CASE(ADD) // VP_ADD
5854 VP_CASE(SUB) // VP_SUB
5855 VP_CASE(MUL) // VP_MUL
5856 VP_CASE(SDIV) // VP_SDIV
5857 VP_CASE(SREM) // VP_SREM
5858 VP_CASE(UDIV) // VP_UDIV
5859 VP_CASE(UREM) // VP_UREM
5860 VP_CASE(SHL) // VP_SHL
5861 VP_CASE(FADD) // VP_FADD
5862 VP_CASE(FSUB) // VP_FSUB
5863 VP_CASE(FMUL) // VP_FMUL
5864 VP_CASE(FDIV) // VP_FDIV
5865 VP_CASE(FNEG) // VP_FNEG
5866 VP_CASE(FABS) // VP_FABS
5867 VP_CASE(SMIN) // VP_SMIN
5868 VP_CASE(SMAX) // VP_SMAX
5869 VP_CASE(UMIN) // VP_UMIN
5870 VP_CASE(UMAX) // VP_UMAX
5871 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
5872 VP_CASE(SETCC) // VP_SETCC
5873 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
5874 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
5875 VP_CASE(BITREVERSE) // VP_BITREVERSE
5876 VP_CASE(SADDSAT) // VP_SADDSAT
5877 VP_CASE(UADDSAT) // VP_UADDSAT
5878 VP_CASE(SSUBSAT) // VP_SSUBSAT
5879 VP_CASE(USUBSAT) // VP_USUBSAT
5880 VP_CASE(BSWAP) // VP_BSWAP
5881 VP_CASE(CTLZ) // VP_CTLZ
5882 VP_CASE(CTTZ) // VP_CTTZ
5883 VP_CASE(CTPOP) // VP_CTPOP
5885 case ISD::VP_CTLZ_ZERO_UNDEF:
5886 return RISCVISD::CTLZ_VL;
5888 case ISD::VP_CTTZ_ZERO_UNDEF:
5889 return RISCVISD::CTTZ_VL;
5890 case ISD::FMA:
5891 case ISD::VP_FMA:
5892 return RISCVISD::VFMADD_VL;
5893 case ISD::STRICT_FMA:
5895 case ISD::AND:
5896 case ISD::VP_AND:
5897 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5898 return RISCVISD::VMAND_VL;
5899 return RISCVISD::AND_VL;
5900 case ISD::OR:
5901 case ISD::VP_OR:
5902 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5903 return RISCVISD::VMOR_VL;
5904 return RISCVISD::OR_VL;
5905 case ISD::XOR:
5906 case ISD::VP_XOR:
5907 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5908 return RISCVISD::VMXOR_VL;
5909 return RISCVISD::XOR_VL;
5910 case ISD::VP_SELECT:
5911 case ISD::VP_MERGE:
5912 return RISCVISD::VMERGE_VL;
5913 case ISD::VP_SRA:
5914 return RISCVISD::SRA_VL;
5915 case ISD::VP_SRL:
5916 return RISCVISD::SRL_VL;
5917 case ISD::VP_SQRT:
5918 return RISCVISD::FSQRT_VL;
5919 case ISD::VP_SIGN_EXTEND:
5920 return RISCVISD::VSEXT_VL;
5921 case ISD::VP_ZERO_EXTEND:
5922 return RISCVISD::VZEXT_VL;
5923 case ISD::VP_FP_TO_SINT:
5925 case ISD::VP_FP_TO_UINT:
5927 case ISD::FMINNUM:
5928 case ISD::VP_FMINNUM:
5929 return RISCVISD::VFMIN_VL;
5930 case ISD::FMAXNUM:
5931 case ISD::VP_FMAXNUM:
5932 return RISCVISD::VFMAX_VL;
5933 case ISD::LRINT:
5934 case ISD::VP_LRINT:
5935 case ISD::LLRINT:
5936 case ISD::VP_LLRINT:
5938 }
5939 // clang-format on
5940#undef OP_CASE
5941#undef VP_CASE
5942}
5943
5944/// Return true if a RISC-V target specified op has a passthru operand.
5945static bool hasPassthruOp(unsigned Opcode) {
5946 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5948 "not a RISC-V target specific op");
5950 130 &&
5953 21 &&
5954 "adding target specific op should update this function");
5955 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
5956 return true;
5957 if (Opcode == RISCVISD::FCOPYSIGN_VL)
5958 return true;
5959 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
5960 return true;
5961 if (Opcode == RISCVISD::SETCC_VL)
5962 return true;
5963 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
5964 return true;
5965 if (Opcode == RISCVISD::VMERGE_VL)
5966 return true;
5967 return false;
5968}
5969
5970/// Return true if a RISC-V target specified op has a mask operand.
5971static bool hasMaskOp(unsigned Opcode) {
5972 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5974 "not a RISC-V target specific op");
5976 130 &&
5979 21 &&
5980 "adding target specific op should update this function");
5981 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
5982 return true;
5983 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
5984 return true;
5985 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
5987 return true;
5988 return false;
5989}
5990
5992 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5993 SDLoc DL(Op);
5994
5997
5998 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5999 if (!Op.getOperand(j).getValueType().isVector()) {
6000 LoOperands[j] = Op.getOperand(j);
6001 HiOperands[j] = Op.getOperand(j);
6002 continue;
6003 }
6004 std::tie(LoOperands[j], HiOperands[j]) =
6005 DAG.SplitVector(Op.getOperand(j), DL);
6006 }
6007
6008 SDValue LoRes =
6009 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6010 SDValue HiRes =
6011 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6012
6013 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6014}
6015
6017 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
6018 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6019 SDLoc DL(Op);
6020
6023
6024 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6025 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
6026 std::tie(LoOperands[j], HiOperands[j]) =
6027 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
6028 continue;
6029 }
6030 if (!Op.getOperand(j).getValueType().isVector()) {
6031 LoOperands[j] = Op.getOperand(j);
6032 HiOperands[j] = Op.getOperand(j);
6033 continue;
6034 }
6035 std::tie(LoOperands[j], HiOperands[j]) =
6036 DAG.SplitVector(Op.getOperand(j), DL);
6037 }
6038
6039 SDValue LoRes =
6040 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6041 SDValue HiRes =
6042 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6043
6044 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6045}
6046
6048 SDLoc DL(Op);
6049
6050 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
6051 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
6052 auto [EVLLo, EVLHi] =
6053 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
6054
6055 SDValue ResLo =
6056 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6057 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
6058 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6059 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
6060}
6061
6063
6064 assert(Op->isStrictFPOpcode());
6065
6066 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
6067
6068 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
6069 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
6070
6071 SDLoc DL(Op);
6072
6075
6076 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6077 if (!Op.getOperand(j).getValueType().isVector()) {
6078 LoOperands[j] = Op.getOperand(j);
6079 HiOperands[j] = Op.getOperand(j);
6080 continue;
6081 }
6082 std::tie(LoOperands[j], HiOperands[j]) =
6083 DAG.SplitVector(Op.getOperand(j), DL);
6084 }
6085
6086 SDValue LoRes =
6087 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6088 HiOperands[0] = LoRes.getValue(1);
6089 SDValue HiRes =
6090 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6091
6092 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6093 LoRes.getValue(0), HiRes.getValue(0));
6094 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6095}
6096
6098 SelectionDAG &DAG) const {
6099 switch (Op.getOpcode()) {
6100 default:
6101 report_fatal_error("unimplemented operand");
6102 case ISD::ATOMIC_FENCE:
6103 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6104 case ISD::GlobalAddress:
6105 return lowerGlobalAddress(Op, DAG);
6106 case ISD::BlockAddress:
6107 return lowerBlockAddress(Op, DAG);
6108 case ISD::ConstantPool:
6109 return lowerConstantPool(Op, DAG);
6110 case ISD::JumpTable:
6111 return lowerJumpTable(Op, DAG);
6113 return lowerGlobalTLSAddress(Op, DAG);
6114 case ISD::Constant:
6115 return lowerConstant(Op, DAG, Subtarget);
6116 case ISD::SELECT:
6117 return lowerSELECT(Op, DAG);
6118 case ISD::BRCOND:
6119 return lowerBRCOND(Op, DAG);
6120 case ISD::VASTART:
6121 return lowerVASTART(Op, DAG);
6122 case ISD::FRAMEADDR:
6123 return lowerFRAMEADDR(Op, DAG);
6124 case ISD::RETURNADDR:
6125 return lowerRETURNADDR(Op, DAG);
6126 case ISD::SHL_PARTS:
6127 return lowerShiftLeftParts(Op, DAG);
6128 case ISD::SRA_PARTS:
6129 return lowerShiftRightParts(Op, DAG, true);
6130 case ISD::SRL_PARTS:
6131 return lowerShiftRightParts(Op, DAG, false);
6132 case ISD::ROTL:
6133 case ISD::ROTR:
6134 if (Op.getValueType().isFixedLengthVector()) {
6135 assert(Subtarget.hasStdExtZvkb());
6136 return lowerToScalableOp(Op, DAG);
6137 }
6138 assert(Subtarget.hasVendorXTHeadBb() &&
6139 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6140 "Unexpected custom legalization");
6141 // XTHeadBb only supports rotate by constant.
6142 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6143 return SDValue();
6144 return Op;
6145 case ISD::BITCAST: {
6146 SDLoc DL(Op);
6147 EVT VT = Op.getValueType();
6148 SDValue Op0 = Op.getOperand(0);
6149 EVT Op0VT = Op0.getValueType();
6150 MVT XLenVT = Subtarget.getXLenVT();
6151 if (VT == MVT::f16 && Op0VT == MVT::i16 &&
6152 Subtarget.hasStdExtZfhminOrZhinxmin()) {
6153 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6154 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
6155 return FPConv;
6156 }
6157 if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
6158 Subtarget.hasStdExtZfbfmin()) {
6159 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6160 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
6161 return FPConv;
6162 }
6163 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6164 Subtarget.hasStdExtFOrZfinx()) {
6165 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6166 SDValue FPConv =
6167 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6168 return FPConv;
6169 }
6170 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32) {
6171 SDValue Lo, Hi;
6172 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6173 SDValue RetReg =
6174 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6175 return RetReg;
6176 }
6177
6178 // Consider other scalar<->scalar casts as legal if the types are legal.
6179 // Otherwise expand them.
6180 if (!VT.isVector() && !Op0VT.isVector()) {
6181 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6182 return Op;
6183 return SDValue();
6184 }
6185
6186 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6187 "Unexpected types");
6188
6189 if (VT.isFixedLengthVector()) {
6190 // We can handle fixed length vector bitcasts with a simple replacement
6191 // in isel.
6192 if (Op0VT.isFixedLengthVector())
6193 return Op;
6194 // When bitcasting from scalar to fixed-length vector, insert the scalar
6195 // into a one-element vector of the result type, and perform a vector
6196 // bitcast.
6197 if (!Op0VT.isVector()) {
6198 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6199 if (!isTypeLegal(BVT))
6200 return SDValue();
6201 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6202 DAG.getUNDEF(BVT), Op0,
6203 DAG.getVectorIdxConstant(0, DL)));
6204 }
6205 return SDValue();
6206 }
6207 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6208 // thus: bitcast the vector to a one-element vector type whose element type
6209 // is the same as the result type, and extract the first element.
6210 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6211 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6212 if (!isTypeLegal(BVT))
6213 return SDValue();
6214 SDValue BVec = DAG.getBitcast(BVT, Op0);
6215 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6216 DAG.getVectorIdxConstant(0, DL));
6217 }
6218 return SDValue();
6219 }
6221 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6223 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6225 return LowerINTRINSIC_VOID(Op, DAG);
6226 case ISD::IS_FPCLASS:
6227 return LowerIS_FPCLASS(Op, DAG);
6228 case ISD::BITREVERSE: {
6229 MVT VT = Op.getSimpleValueType();
6230 if (VT.isFixedLengthVector()) {
6231 assert(Subtarget.hasStdExtZvbb());
6232 return lowerToScalableOp(Op, DAG);
6233 }
6234 SDLoc DL(Op);
6235 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6236 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6237 // Expand bitreverse to a bswap(rev8) followed by brev8.
6238 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6239 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6240 }
6241 case ISD::TRUNCATE:
6244 // Only custom-lower vector truncates
6245 if (!Op.getSimpleValueType().isVector())
6246 return Op;
6247 return lowerVectorTruncLike(Op, DAG);
6248 case ISD::ANY_EXTEND:
6249 case ISD::ZERO_EXTEND:
6250 if (Op.getOperand(0).getValueType().isVector() &&
6251 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6252 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6253 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6254 case ISD::SIGN_EXTEND:
6255 if (Op.getOperand(0).getValueType().isVector() &&
6256 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6257 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6258 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6260 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6262 return lowerINSERT_VECTOR_ELT(Op, DAG);
6264 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6265 case ISD::SCALAR_TO_VECTOR: {
6266 MVT VT = Op.getSimpleValueType();
6267 SDLoc DL(Op);
6268 SDValue Scalar = Op.getOperand(0);
6269 if (VT.getVectorElementType() == MVT::i1) {
6270 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6271 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6272 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6273 }
6274 MVT ContainerVT = VT;
6275 if (VT.isFixedLengthVector())
6276 ContainerVT = getContainerForFixedLengthVector(VT);
6277 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6278 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6279 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6280 DAG.getUNDEF(ContainerVT), Scalar, VL);
6281 if (VT.isFixedLengthVector())
6282 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6283 return V;
6284 }
6285 case ISD::VSCALE: {
6286 MVT XLenVT = Subtarget.getXLenVT();
6287 MVT VT = Op.getSimpleValueType();
6288 SDLoc DL(Op);
6289 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6290 // We define our scalable vector types for lmul=1 to use a 64 bit known
6291 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6292 // vscale as VLENB / 8.
6293 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6294 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6295 report_fatal_error("Support for VLEN==32 is incomplete.");
6296 // We assume VLENB is a multiple of 8. We manually choose the best shift
6297 // here because SimplifyDemandedBits isn't always able to simplify it.
6298 uint64_t Val = Op.getConstantOperandVal(0);
6299 if (isPowerOf2_64(Val)) {
6300 uint64_t Log2 = Log2_64(Val);
6301 if (Log2 < 3)
6302 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6303 DAG.getConstant(3 - Log2, DL, VT));
6304 else if (Log2 > 3)
6305 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6306 DAG.getConstant(Log2 - 3, DL, XLenVT));
6307 } else if ((Val % 8) == 0) {
6308 // If the multiplier is a multiple of 8, scale it down to avoid needing
6309 // to shift the VLENB value.
6310 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6311 DAG.getConstant(Val / 8, DL, XLenVT));
6312 } else {
6313 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6314 DAG.getConstant(3, DL, XLenVT));
6315 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6316 DAG.getConstant(Val, DL, XLenVT));
6317 }
6318 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6319 }
6320 case ISD::FPOWI: {
6321 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6322 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6323 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6324 Op.getOperand(1).getValueType() == MVT::i32) {
6325 SDLoc DL(Op);
6326 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6327 SDValue Powi =
6328 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6329 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6330 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6331 }
6332 return SDValue();
6333 }
6334 case ISD::FMAXIMUM:
6335 case ISD::FMINIMUM:
6336 if (Op.getValueType() == MVT::nxv32f16 &&
6337 (Subtarget.hasVInstructionsF16Minimal() &&
6338 !Subtarget.hasVInstructionsF16()))
6339 return SplitVectorOp(Op, DAG);
6340 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6341 case ISD::FP_EXTEND: {
6342 SDLoc DL(Op);
6343 EVT VT = Op.getValueType();
6344 SDValue Op0 = Op.getOperand(0);
6345 EVT Op0VT = Op0.getValueType();
6346 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
6347 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6348 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
6349 SDValue FloatVal =
6350 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6351 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
6352 }
6353
6354 if (!Op.getValueType().isVector())
6355 return Op;
6356 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6357 }
6358 case ISD::FP_ROUND: {
6359 SDLoc DL(Op);
6360 EVT VT = Op.getValueType();
6361 SDValue Op0 = Op.getOperand(0);
6362 EVT Op0VT = Op0.getValueType();
6363 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
6364 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
6365 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
6366 Subtarget.hasStdExtDOrZdinx()) {
6367 SDValue FloatVal =
6368 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
6369 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6370 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
6371 }
6372
6373 if (!Op.getValueType().isVector())
6374 return Op;
6375 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6376 }
6379 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6380 case ISD::SINT_TO_FP:
6381 case ISD::UINT_TO_FP:
6382 if (Op.getValueType().isVector() &&
6383 Op.getValueType().getScalarType() == MVT::f16 &&
6384 (Subtarget.hasVInstructionsF16Minimal() &&
6385 !Subtarget.hasVInstructionsF16())) {
6386 if (Op.getValueType() == MVT::nxv32f16)
6387 return SplitVectorOp(Op, DAG);
6388 // int -> f32
6389 SDLoc DL(Op);
6390 MVT NVT =
6391 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6392 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6393 // f32 -> f16
6394 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6395 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6396 }
6397 [[fallthrough]];
6398 case ISD::FP_TO_SINT:
6399 case ISD::FP_TO_UINT:
6400 if (SDValue Op1 = Op.getOperand(0);
6401 Op1.getValueType().isVector() &&
6402 Op1.getValueType().getScalarType() == MVT::f16 &&
6403 (Subtarget.hasVInstructionsF16Minimal() &&
6404 !Subtarget.hasVInstructionsF16())) {
6405 if (Op1.getValueType() == MVT::nxv32f16)
6406 return SplitVectorOp(Op, DAG);
6407 // f16 -> f32
6408 SDLoc DL(Op);
6409 MVT NVT = MVT::getVectorVT(MVT::f32,
6410 Op1.getValueType().getVectorElementCount());
6411 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6412 // f32 -> int
6413 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6414 }
6415 [[fallthrough]];
6420 // RVV can only do fp<->int conversions to types half/double the size as
6421 // the source. We custom-lower any conversions that do two hops into
6422 // sequences.
6423 MVT VT = Op.getSimpleValueType();
6424 if (!VT.isVector())
6425 return Op;
6426 SDLoc DL(Op);
6427 bool IsStrict = Op->isStrictFPOpcode();
6428 SDValue Src = Op.getOperand(0 + IsStrict);
6429 MVT EltVT = VT.getVectorElementType();
6430 MVT SrcVT = Src.getSimpleValueType();
6431 MVT SrcEltVT = SrcVT.getVectorElementType();
6432 unsigned EltSize = EltVT.getSizeInBits();
6433 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6434 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6435 "Unexpected vector element types");
6436
6437 bool IsInt2FP = SrcEltVT.isInteger();
6438 // Widening conversions
6439 if (EltSize > (2 * SrcEltSize)) {
6440 if (IsInt2FP) {
6441 // Do a regular integer sign/zero extension then convert to float.
6442 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6444 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6445 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6448 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6449 if (IsStrict)
6450 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6451 Op.getOperand(0), Ext);
6452 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6453 }
6454 // FP2Int
6455 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6456 // Do one doubling fp_extend then complete the operation by converting
6457 // to int.
6458 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6459 if (IsStrict) {
6460 auto [FExt, Chain] =
6461 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6462 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6463 }
6464 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6465 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6466 }
6467
6468 // Narrowing conversions
6469 if (SrcEltSize > (2 * EltSize)) {
6470 if (IsInt2FP) {
6471 // One narrowing int_to_fp, then an fp_round.
6472 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6473 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6474 if (IsStrict) {
6475 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6476 DAG.getVTList(InterimFVT, MVT::Other),
6477 Op.getOperand(0), Src);
6478 SDValue Chain = Int2FP.getValue(1);
6479 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
6480 }
6481 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
6482 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
6483 }
6484 // FP2Int
6485 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6486 // representable by the integer, the result is poison.
6487 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6489 if (IsStrict) {
6490 SDValue FP2Int =
6491 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
6492 Op.getOperand(0), Src);
6493 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6494 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
6495 }
6496 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
6497 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6498 }
6499
6500 // Scalable vectors can exit here. Patterns will handle equally-sized
6501 // conversions halving/doubling ones.
6502 if (!VT.isFixedLengthVector())
6503 return Op;
6504
6505 // For fixed-length vectors we lower to a custom "VL" node.
6506 unsigned RVVOpc = 0;
6507 switch (Op.getOpcode()) {
6508 default:
6509 llvm_unreachable("Impossible opcode");
6510 case ISD::FP_TO_SINT:
6512 break;
6513 case ISD::FP_TO_UINT:
6515 break;
6516 case ISD::SINT_TO_FP:
6517 RVVOpc = RISCVISD::SINT_TO_FP_VL;
6518 break;
6519 case ISD::UINT_TO_FP:
6520 RVVOpc = RISCVISD::UINT_TO_FP_VL;
6521 break;
6524 break;
6527 break;
6530 break;
6533 break;
6534 }
6535
6536 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6537 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6538 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6539 "Expected same element count");
6540
6541 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6542
6543 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6544 if (IsStrict) {
6545 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6546 Op.getOperand(0), Src, Mask, VL);
6547 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
6548 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
6549 }
6550 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
6551 return convertFromScalableVector(VT, Src, DAG, Subtarget);
6552 }
6555 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6556 case ISD::FP_TO_BF16: {
6557 // Custom lower to ensure the libcall return is passed in an FPR on hard
6558 // float ABIs.
6559 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
6560 SDLoc DL(Op);
6561 MakeLibCallOptions CallOptions;
6562 RTLIB::Libcall LC =
6563 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
6564 SDValue Res =
6565 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6566 if (Subtarget.is64Bit())
6567 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6568 return DAG.getBitcast(MVT::i32, Res);
6569 }
6570 case ISD::BF16_TO_FP: {
6571 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6572 MVT VT = Op.getSimpleValueType();
6573 SDLoc DL(Op);
6574 Op = DAG.getNode(
6575 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
6576 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
6577 SDValue Res = Subtarget.is64Bit()
6578 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
6579 : DAG.getBitcast(MVT::f32, Op);
6580 // fp_extend if the target VT is bigger than f32.
6581 if (VT != MVT::f32)
6582 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
6583 return Res;
6584 }
6585 case ISD::FP_TO_FP16: {
6586 // Custom lower to ensure the libcall return is passed in an FPR on hard
6587 // float ABIs.
6588 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6589 SDLoc DL(Op);
6590 MakeLibCallOptions CallOptions;
6591 RTLIB::Libcall LC =
6592 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
6593 SDValue Res =
6594 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6595 if (Subtarget.is64Bit())
6596 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6597 return DAG.getBitcast(MVT::i32, Res);
6598 }
6599 case ISD::FP16_TO_FP: {
6600 // Custom lower to ensure the libcall argument is passed in an FPR on hard
6601 // float ABIs.
6602 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6603 SDLoc DL(Op);
6604 MakeLibCallOptions CallOptions;
6605 SDValue Arg = Subtarget.is64Bit()
6606 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
6607 Op.getOperand(0))
6608 : DAG.getBitcast(MVT::f32, Op.getOperand(0));
6609 SDValue Res =
6610 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
6611 .first;
6612 return Res;
6613 }
6614 case ISD::FTRUNC:
6615 case ISD::FCEIL:
6616 case ISD::FFLOOR:
6617 case ISD::FNEARBYINT:
6618 case ISD::FRINT:
6619 case ISD::FROUND:
6620 case ISD::FROUNDEVEN:
6621 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6622 case ISD::LRINT:
6623 case ISD::LLRINT:
6624 return lowerVectorXRINT(Op, DAG, Subtarget);
6625 case ISD::VECREDUCE_ADD:
6630 return lowerVECREDUCE(Op, DAG);
6631 case ISD::VECREDUCE_AND:
6632 case ISD::VECREDUCE_OR:
6633 case ISD::VECREDUCE_XOR:
6634 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6635 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
6636 return lowerVECREDUCE(Op, DAG);
6643 return lowerFPVECREDUCE(Op, DAG);
6644 case ISD::VP_REDUCE_ADD:
6645 case ISD::VP_REDUCE_UMAX:
6646 case ISD::VP_REDUCE_SMAX:
6647 case ISD::VP_REDUCE_UMIN:
6648 case ISD::VP_REDUCE_SMIN:
6649 case ISD::VP_REDUCE_FADD:
6650 case ISD::VP_REDUCE_SEQ_FADD:
6651 case ISD::VP_REDUCE_FMIN:
6652 case ISD::VP_REDUCE_FMAX:
6653 case ISD::VP_REDUCE_FMINIMUM:
6654 case ISD::VP_REDUCE_FMAXIMUM:
6655 if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
6656 (Subtarget.hasVInstructionsF16Minimal() &&
6657 !Subtarget.hasVInstructionsF16()))
6658 return SplitVectorReductionOp(Op, DAG);
6659 return lowerVPREDUCE(Op, DAG);
6660 case ISD::VP_REDUCE_AND:
6661 case ISD::VP_REDUCE_OR:
6662 case ISD::VP_REDUCE_XOR:
6663 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
6664 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
6665 return lowerVPREDUCE(Op, DAG);
6666 case ISD::VP_CTTZ_ELTS:
6667 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
6668 return lowerVPCttzElements(Op, DAG);
6669 case ISD::UNDEF: {
6670 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
6671 return convertFromScalableVector(Op.getSimpleValueType(),
6672 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
6673 }
6675 return lowerINSERT_SUBVECTOR(Op, DAG);
6677 return lowerEXTRACT_SUBVECTOR(Op, DAG);
6679 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
6681 return lowerVECTOR_INTERLEAVE(Op, DAG);
6682 case ISD::STEP_VECTOR:
6683 return lowerSTEP_VECTOR(Op, DAG);
6685 return lowerVECTOR_REVERSE(Op, DAG);
6686 case ISD::VECTOR_SPLICE:
6687 return lowerVECTOR_SPLICE(Op, DAG);
6688 case ISD::BUILD_VECTOR:
6689 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
6690 case ISD::SPLAT_VECTOR:
6691 if ((Op.getValueType().getScalarType() == MVT::f16 &&
6692 (Subtarget.hasVInstructionsF16Minimal() &&
6693 Subtarget.hasStdExtZfhminOrZhinxmin() &&
6694 !Subtarget.hasVInstructionsF16())) ||
6695 (Op.getValueType().getScalarType() == MVT::bf16 &&
6696 (Subtarget.hasVInstructionsBF16Minimal() &&
6697 Subtarget.hasStdExtZfbfmin()))) {
6698 if (Op.getValueType() == MVT::nxv32f16 ||
6699 Op.getValueType() == MVT::nxv32bf16)
6700 return SplitVectorOp(Op, DAG);
6701 SDLoc DL(Op);
6702 SDValue NewScalar =
6703 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6704 SDValue NewSplat = DAG.getNode(
6706 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),
6707 NewScalar);
6708 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat,
6709 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6710 }
6711 if (Op.getValueType().getVectorElementType() == MVT::i1)
6712 return lowerVectorMaskSplat(Op, DAG);
6713 return SDValue();
6715 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
6716 case ISD::CONCAT_VECTORS: {
6717 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6718 // better than going through the stack, as the default expansion does.
6719 SDLoc DL(Op);
6720 MVT VT = Op.getSimpleValueType();
6721 MVT ContainerVT = VT;
6722 if (VT.isFixedLengthVector())
6723 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
6724
6725 // Recursively split concat_vectors with more than 2 operands:
6726 //
6727 // concat_vector op1, op2, op3, op4
6728 // ->
6729 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
6730 //
6731 // This reduces the length of the chain of vslideups and allows us to
6732 // perform the vslideups at a smaller LMUL, limited to MF2.
6733 if (Op.getNumOperands() > 2 &&
6734 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
6735 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6737 size_t HalfNumOps = Op.getNumOperands() / 2;
6738 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6739 Op->ops().take_front(HalfNumOps));
6740 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6741 Op->ops().drop_front(HalfNumOps));
6742 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6743 }
6744
6745 unsigned NumOpElts =
6746 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
6747 SDValue Vec = DAG.getUNDEF(VT);
6748 for (const auto &OpIdx : enumerate(Op->ops())) {
6749 SDValue SubVec = OpIdx.value();
6750 // Don't insert undef subvectors.
6751 if (SubVec.isUndef())
6752 continue;
6753 Vec =
6754 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
6755 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
6756 }
6757 return Vec;
6758 }
6759 case ISD::LOAD:
6760 if (auto V = expandUnalignedRVVLoad(Op, DAG))
6761 return V;
6762 if (Op.getValueType().isFixedLengthVector())
6763 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
6764 return Op;
6765 case ISD::STORE:
6766 if (auto V = expandUnalignedRVVStore(Op, DAG))
6767 return V;
6768 if (Op.getOperand(1).getValueType().isFixedLengthVector())
6769 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
6770 return Op;
6771 case ISD::MLOAD:
6772 case ISD::VP_LOAD:
6773 return lowerMaskedLoad(Op, DAG);
6774 case ISD::MSTORE:
6775 case ISD::VP_STORE:
6776 return lowerMaskedStore(Op, DAG);
6777 case ISD::SELECT_CC: {
6778 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
6779 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6780 // into separate SETCC+SELECT just like LegalizeDAG.
6781 SDValue Tmp1 = Op.getOperand(0);
6782 SDValue Tmp2 = Op.getOperand(1);
6783 SDValue True = Op.getOperand(2);
6784 SDValue False = Op.getOperand(3);
6785 EVT VT = Op.getValueType();
6786 SDValue CC = Op.getOperand(4);
6787 EVT CmpVT = Tmp1.getValueType();
6788 EVT CCVT =
6789 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
6790 SDLoc DL(Op);
6791 SDValue Cond =
6792 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
6793 return DAG.getSelect(DL, VT, Cond, True, False);
6794 }
6795 case ISD::SETCC: {
6796 MVT OpVT = Op.getOperand(0).getSimpleValueType();
6797 if (OpVT.isScalarInteger()) {
6798 MVT VT = Op.getSimpleValueType();
6799 SDValue LHS = Op.getOperand(0);
6800 SDValue RHS = Op.getOperand(1);
6801 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6802 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
6803 "Unexpected CondCode");
6804
6805 SDLoc DL(Op);
6806
6807 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6808 // convert this to the equivalent of (set(u)ge X, C+1) by using
6809 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6810 // in a register.
6811 if (isa<ConstantSDNode>(RHS)) {
6812 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
6813 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
6814 // If this is an unsigned compare and the constant is -1, incrementing
6815 // the constant would change behavior. The result should be false.
6816 if (CCVal == ISD::SETUGT && Imm == -1)
6817 return DAG.getConstant(0, DL, VT);
6818 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6819 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6820 SDValue SetCC = DAG.getSetCC(
6821 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
6822 return DAG.getLogicalNOT(DL, SetCC, VT);
6823 }
6824 }
6825
6826 // Not a constant we could handle, swap the operands and condition code to
6827 // SETLT/SETULT.
6828 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6829 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
6830 }
6831
6832 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6833 (Subtarget.hasVInstructionsF16Minimal() &&
6834 !Subtarget.hasVInstructionsF16()))
6835 return SplitVectorOp(Op, DAG);
6836
6837 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
6838 }
6839 case ISD::ADD:
6840 case ISD::SUB:
6841 case ISD::MUL:
6842 case ISD::MULHS:
6843 case ISD::MULHU:
6844 case ISD::AND:
6845 case ISD::OR:
6846 case ISD::XOR:
6847 case ISD::SDIV:
6848 case ISD::SREM:
6849 case ISD::UDIV:
6850 case ISD::UREM:
6851 case ISD::BSWAP:
6852 case ISD::CTPOP:
6853 return lowerToScalableOp(Op, DAG);
6854 case ISD::SHL:
6855 case ISD::SRA:
6856 case ISD::SRL:
6857 if (Op.getSimpleValueType().isFixedLengthVector())
6858 return lowerToScalableOp(Op, DAG);
6859 // This can be called for an i32 shift amount that needs to be promoted.
6860 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
6861 "Unexpected custom legalisation");
6862 return SDValue();
6863 case ISD::FADD:
6864 case ISD::FSUB:
6865 case ISD::FMUL:
6866 case ISD::FDIV:
6867 case ISD::FNEG:
6868 case ISD::FABS:
6869 case ISD::FSQRT:
6870 case ISD::FMA:
6871 case ISD::FMINNUM:
6872 case ISD::FMAXNUM:
6873 if (Op.getValueType() == MVT::nxv32f16 &&
6874 (Subtarget.hasVInstructionsF16Minimal() &&
6875 !Subtarget.hasVInstructionsF16()))
6876 return SplitVectorOp(Op, DAG);
6877 [[fallthrough]];
6878 case ISD::AVGFLOORS:
6879 case ISD::AVGFLOORU:
6880 case ISD::AVGCEILS:
6881 case ISD::AVGCEILU:
6882 case ISD::SMIN:
6883 case ISD::SMAX:
6884 case ISD::UMIN:
6885 case ISD::UMAX:
6886 return lowerToScalableOp(Op, DAG);
6887 case ISD::UADDSAT:
6888 case ISD::USUBSAT:
6889 return lowerToScalableOp(Op, DAG);
6890 case ISD::SADDSAT:
6891 case ISD::SSUBSAT:
6892 return lowerToScalableOp(Op, DAG);
6893 case ISD::ABDS:
6894 case ISD::ABDU: {
6895 SDLoc dl(Op);
6896 EVT VT = Op->getValueType(0);
6897 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
6898 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
6899 bool IsSigned = Op->getOpcode() == ISD::ABDS;
6900
6901 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
6902 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
6903 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
6904 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
6905 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
6906 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
6907 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
6908 }
6909 case ISD::ABS:
6910 case ISD::VP_ABS:
6911 return lowerABS(Op, DAG);
6912 case ISD::CTLZ:
6914 case ISD::CTTZ:
6916 if (Subtarget.hasStdExtZvbb())
6917 return lowerToScalableOp(Op, DAG);
6918 assert(Op.getOpcode() != ISD::CTTZ);
6919 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6920 case ISD::VSELECT:
6921 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
6922 case ISD::FCOPYSIGN:
6923 if (Op.getValueType() == MVT::nxv32f16 &&
6924 (Subtarget.hasVInstructionsF16Minimal() &&
6925 !Subtarget.hasVInstructionsF16()))
6926 return SplitVectorOp(Op, DAG);
6927 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
6928 case ISD::STRICT_FADD:
6929 case ISD::STRICT_FSUB:
6930 case ISD::STRICT_FMUL:
6931 case ISD::STRICT_FDIV:
6932 case ISD::STRICT_FSQRT:
6933 case ISD::STRICT_FMA:
6934 if (Op.getValueType() == MVT::nxv32f16 &&
6935 (Subtarget.hasVInstructionsF16Minimal() &&
6936 !Subtarget.hasVInstructionsF16()))
6937 return SplitStrictFPVectorOp(Op, DAG);
6938 return lowerToScalableOp(Op, DAG);
6939 case ISD::STRICT_FSETCC:
6941 return lowerVectorStrictFSetcc(Op, DAG);
6942 case ISD::STRICT_FCEIL:
6943 case ISD::STRICT_FRINT:
6944 case ISD::STRICT_FFLOOR:
6945 case ISD::STRICT_FTRUNC:
6947 case ISD::STRICT_FROUND:
6949 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6950 case ISD::MGATHER:
6951 case ISD::VP_GATHER:
6952 return lowerMaskedGather(Op, DAG);
6953 case ISD::MSCATTER:
6954 case ISD::VP_SCATTER:
6955 return lowerMaskedScatter(Op, DAG);
6956 case ISD::GET_ROUNDING:
6957 return lowerGET_ROUNDING(Op, DAG);
6958 case ISD::SET_ROUNDING:
6959 return lowerSET_ROUNDING(Op, DAG);
6960 case ISD::EH_DWARF_CFA:
6961 return lowerEH_DWARF_CFA(Op, DAG);
6962 case ISD::VP_SELECT:
6963 case ISD::VP_MERGE:
6964 case ISD::VP_ADD:
6965 case ISD::VP_SUB:
6966 case ISD::VP_MUL:
6967 case ISD::VP_SDIV:
6968 case ISD::VP_UDIV:
6969 case ISD::VP_SREM:
6970 case ISD::VP_UREM:
6971 case ISD::VP_UADDSAT:
6972 case ISD::VP_USUBSAT:
6973 case ISD::VP_SADDSAT:
6974 case ISD::VP_SSUBSAT:
6975 case ISD::VP_LRINT:
6976 case ISD::VP_LLRINT:
6977 return lowerVPOp(Op, DAG);
6978 case ISD::VP_AND:
6979 case ISD::VP_OR:
6980 case ISD::VP_XOR:
6981 return lowerLogicVPOp(Op, DAG);
6982 case ISD::VP_FADD:
6983 case ISD::VP_FSUB:
6984 case ISD::VP_FMUL:
6985 case ISD::VP_FDIV:
6986 case ISD::VP_FNEG:
6987 case ISD::VP_FABS:
6988 case ISD::VP_SQRT:
6989 case ISD::VP_FMA:
6990 case ISD::VP_FMINNUM:
6991 case ISD::VP_FMAXNUM:
6992 case ISD::VP_FCOPYSIGN:
6993 if (Op.getValueType() == MVT::nxv32f16 &&
6994 (Subtarget.hasVInstructionsF16Minimal() &&
6995 !Subtarget.hasVInstructionsF16()))
6996 return SplitVPOp(Op, DAG);
6997 [[fallthrough]];
6998 case ISD::VP_SRA:
6999 case ISD::VP_SRL:
7000 case ISD::VP_SHL:
7001 return lowerVPOp(Op, DAG);
7002 case ISD::VP_IS_FPCLASS:
7003 return LowerIS_FPCLASS(Op, DAG);
7004 case ISD::VP_SIGN_EXTEND:
7005 case ISD::VP_ZERO_EXTEND:
7006 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7007 return lowerVPExtMaskOp(Op, DAG);
7008 return lowerVPOp(Op, DAG);
7009 case ISD::VP_TRUNCATE:
7010 return lowerVectorTruncLike(Op, DAG);
7011 case ISD::VP_FP_EXTEND:
7012 case ISD::VP_FP_ROUND:
7013 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7014 case ISD::VP_SINT_TO_FP:
7015 case ISD::VP_UINT_TO_FP:
7016 if (Op.getValueType().isVector() &&
7017 Op.getValueType().getScalarType() == MVT::f16 &&
7018 (Subtarget.hasVInstructionsF16Minimal() &&
7019 !Subtarget.hasVInstructionsF16())) {
7020 if (Op.getValueType() == MVT::nxv32f16)
7021 return SplitVPOp(Op, DAG);
7022 // int -> f32
7023 SDLoc DL(Op);
7024 MVT NVT =
7025 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7026 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7027 // f32 -> f16
7028 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7029 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7030 }
7031 [[fallthrough]];
7032 case ISD::VP_FP_TO_SINT:
7033 case ISD::VP_FP_TO_UINT:
7034 if (SDValue Op1 = Op.getOperand(0);
7035 Op1.getValueType().isVector() &&
7036 Op1.getValueType().getScalarType() == MVT::f16 &&
7037 (Subtarget.hasVInstructionsF16Minimal() &&
7038 !Subtarget.hasVInstructionsF16())) {
7039 if (Op1.getValueType() == MVT::nxv32f16)
7040 return SplitVPOp(Op, DAG);
7041 // f16 -> f32
7042 SDLoc DL(Op);
7043 MVT NVT = MVT::getVectorVT(MVT::f32,
7044 Op1.getValueType().getVectorElementCount());
7045 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7046 // f32 -> int
7047 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7048 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7049 }
7050 return lowerVPFPIntConvOp(Op, DAG);
7051 case ISD::VP_SETCC:
7052 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
7053 (Subtarget.hasVInstructionsF16Minimal() &&
7054 !Subtarget.hasVInstructionsF16()))
7055 return SplitVPOp(Op, DAG);
7056 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7057 return lowerVPSetCCMaskOp(Op, DAG);
7058 [[fallthrough]];
7059 case ISD::VP_SMIN:
7060 case ISD::VP_SMAX:
7061 case ISD::VP_UMIN:
7062 case ISD::VP_UMAX:
7063 case ISD::VP_BITREVERSE:
7064 case ISD::VP_BSWAP:
7065 return lowerVPOp(Op, DAG);
7066 case ISD::VP_CTLZ:
7067 case ISD::VP_CTLZ_ZERO_UNDEF:
7068 if (Subtarget.hasStdExtZvbb())
7069 return lowerVPOp(Op, DAG);
7070 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7071 case ISD::VP_CTTZ:
7072 case ISD::VP_CTTZ_ZERO_UNDEF:
7073 if (Subtarget.hasStdExtZvbb())
7074 return lowerVPOp(Op, DAG);
7075 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7076 case ISD::VP_CTPOP:
7077 return lowerVPOp(Op, DAG);
7078 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7079 return lowerVPStridedLoad(Op, DAG);
7080 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7081 return lowerVPStridedStore(Op, DAG);
7082 case ISD::VP_FCEIL:
7083 case ISD::VP_FFLOOR:
7084 case ISD::VP_FRINT:
7085 case ISD::VP_FNEARBYINT:
7086 case ISD::VP_FROUND:
7087 case ISD::VP_FROUNDEVEN:
7088 case ISD::VP_FROUNDTOZERO:
7089 if (Op.getValueType() == MVT::nxv32f16 &&
7090 (Subtarget.hasVInstructionsF16Minimal() &&
7091 !Subtarget.hasVInstructionsF16()))
7092 return SplitVPOp(Op, DAG);
7093 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7094 case ISD::VP_FMAXIMUM:
7095 case ISD::VP_FMINIMUM:
7096 if (Op.getValueType() == MVT::nxv32f16 &&
7097 (Subtarget.hasVInstructionsF16Minimal() &&
7098 !Subtarget.hasVInstructionsF16()))
7099 return SplitVPOp(Op, DAG);
7100 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7101 case ISD::EXPERIMENTAL_VP_SPLICE:
7102 return lowerVPSpliceExperimental(Op, DAG);
7103 case ISD::EXPERIMENTAL_VP_REVERSE:
7104 return lowerVPReverseExperimental(Op, DAG);
7105 case ISD::EXPERIMENTAL_VP_SPLAT:
7106 return lowerVPSplatExperimental(Op, DAG);
7107 case ISD::CLEAR_CACHE: {
7108 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
7109 "llvm.clear_cache only needs custom lower on Linux targets");
7110 SDLoc DL(Op);
7111 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7112 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
7113 Op.getOperand(2), Flags, DL);
7114 }
7115 }
7116}
7117
7118SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
7119 SDValue Start, SDValue End,
7120 SDValue Flags, SDLoc DL) const {
7121 MakeLibCallOptions CallOptions;
7122 std::pair<SDValue, SDValue> CallResult =
7123 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
7124 {Start, End, Flags}, CallOptions, DL, InChain);
7125
7126 // This function returns void so only the out chain matters.
7127 return CallResult.second;
7128}
7129
7131 SelectionDAG &DAG, unsigned Flags) {
7132 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7133}
7134
7136 SelectionDAG &DAG, unsigned Flags) {
7137 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7138 Flags);
7139}
7140
7142 SelectionDAG &DAG, unsigned Flags) {
7143 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7144 N->getOffset(), Flags);
7145}
7146
7148 SelectionDAG &DAG, unsigned Flags) {
7149 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7150}
7151
7152template <class NodeTy>
7153SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7154 bool IsLocal, bool IsExternWeak) const {
7155 SDLoc DL(N);
7156 EVT Ty = getPointerTy(DAG.getDataLayout());
7157
7158 // When HWASAN is used and tagging of global variables is enabled
7159 // they should be accessed via the GOT, since the tagged address of a global
7160 // is incompatible with existing code models. This also applies to non-pic
7161 // mode.
7162 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7163 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7164 if (IsLocal && !Subtarget.allowTaggedGlobals())
7165 // Use PC-relative addressing to access the symbol. This generates the
7166 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7167 // %pcrel_lo(auipc)).
7168 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7169
7170 // Use PC-relative addressing to access the GOT for this symbol, then load
7171 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7172 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7173 SDValue Load =
7174 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7180 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7181 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7182 return Load;
7183 }
7184
7185 switch (getTargetMachine().getCodeModel()) {
7186 default:
7187 report_fatal_error("Unsupported code model for lowering");
7188 case CodeModel::Small: {
7189 // Generate a sequence for accessing addresses within the first 2 GiB of
7190 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7191 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7192 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7193 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7194 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7195 }
7196 case CodeModel::Medium: {
7197 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7198 if (IsExternWeak) {
7199 // An extern weak symbol may be undefined, i.e. have value 0, which may
7200 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7201 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7202 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7203 SDValue Load =
7204 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7210 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7211 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7212 return Load;
7213 }
7214
7215 // Generate a sequence for accessing addresses within any 2GiB range within
7216 // the address space. This generates the pattern (PseudoLLA sym), which
7217 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7218 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7219 }
7220 }
7221}
7222
7223SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
7224 SelectionDAG &DAG) const {
7225 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7226 assert(N->getOffset() == 0 && "unexpected offset in global node");
7227 const GlobalValue *GV = N->getGlobal();
7228 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
7229}
7230
7231SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
7232 SelectionDAG &DAG) const {
7233 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
7234
7235 return getAddr(N, DAG);
7236}
7237
7238SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
7239 SelectionDAG &DAG) const {
7240 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
7241
7242 return getAddr(N, DAG);
7243}
7244
7245SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
7246 SelectionDAG &DAG) const {
7247 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
7248
7249 return getAddr(N, DAG);
7250}
7251
7252SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
7253 SelectionDAG &DAG,
7254 bool UseGOT) const {
7255 SDLoc DL(N);
7256 EVT Ty = getPointerTy(DAG.getDataLayout());
7257 const GlobalValue *GV = N->getGlobal();
7258 MVT XLenVT = Subtarget.getXLenVT();
7259
7260 if (UseGOT) {
7261 // Use PC-relative addressing to access the GOT for this TLS symbol, then
7262 // load the address from the GOT and add the thread pointer. This generates
7263 // the pattern (PseudoLA_TLS_IE sym), which expands to
7264 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
7265 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7266 SDValue Load =
7267 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
7273 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7274 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7275
7276 // Add the thread pointer.
7277 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7278 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
7279 }
7280
7281 // Generate a sequence for accessing the address relative to the thread
7282 // pointer, with the appropriate adjustment for the thread pointer offset.
7283 // This generates the pattern
7284 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
7285 SDValue AddrHi =
7287 SDValue AddrAdd =
7289 SDValue AddrLo =
7291
7292 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7293 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7294 SDValue MNAdd =
7295 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
7296 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
7297}
7298
7299SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
7300 SelectionDAG &DAG) const {
7301 SDLoc DL(N);
7302 EVT Ty = getPointerTy(DAG.getDataLayout());
7303 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
7304 const GlobalValue *GV = N->getGlobal();
7305
7306 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7307 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
7308 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
7309 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7310 SDValue Load =
7311 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
7312
7313 // Prepare argument list to generate call.
7315 ArgListEntry Entry;
7316 Entry.Node = Load;
7317 Entry.Ty = CallTy;
7318 Args.push_back(Entry);
7319
7320 // Setup call to __tls_get_addr.
7322 CLI.setDebugLoc(DL)
7323 .setChain(DAG.getEntryNode())
7324 .setLibCallee(CallingConv::C, CallTy,
7325 DAG.getExternalSymbol("__tls_get_addr", Ty),
7326 std::move(Args));
7327
7328 return LowerCallTo(CLI).first;
7329}
7330
7331SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
7332 SelectionDAG &DAG) const {
7333 SDLoc DL(N);
7334 EVT Ty = getPointerTy(DAG.getDataLayout());
7335 const GlobalValue *GV = N->getGlobal();
7336
7337 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7338 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
7339 //
7340 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
7341 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
7342 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
7343 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
7344 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7345 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
7346}
7347
7348SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
7349 SelectionDAG &DAG) const {
7350 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7351 assert(N->getOffset() == 0 && "unexpected offset in global node");
7352
7353 if (DAG.getTarget().useEmulatedTLS())
7354 return LowerToTLSEmulatedModel(N, DAG);
7355
7357
7360 report_fatal_error("In GHC calling convention TLS is not supported");
7361
7362 SDValue Addr;
7363 switch (Model) {
7365 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
7366 break;
7368 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
7369 break;
7372 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
7373 : getDynamicTLSAddr(N, DAG);
7374 break;
7375 }
7376
7377 return Addr;
7378}
7379
7380// Return true if Val is equal to (setcc LHS, RHS, CC).
7381// Return false if Val is the inverse of (setcc LHS, RHS, CC).
7382// Otherwise, return std::nullopt.
7383static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
7384 ISD::CondCode CC, SDValue Val) {
7385 assert(Val->getOpcode() == ISD::SETCC);
7386 SDValue LHS2 = Val.getOperand(0);
7387 SDValue RHS2 = Val.getOperand(1);
7388 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
7389
7390 if (LHS == LHS2 && RHS == RHS2) {
7391 if (CC == CC2)
7392 return true;
7393 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7394 return false;
7395 } else if (LHS == RHS2 && RHS == LHS2) {
7397 if (CC == CC2)
7398 return true;
7399 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7400 return false;
7401 }
7402
7403 return std::nullopt;
7404}
7405
7407 const RISCVSubtarget &Subtarget) {
7408 SDValue CondV = N->getOperand(0);
7409 SDValue TrueV = N->getOperand(1);
7410 SDValue FalseV = N->getOperand(2);
7411 MVT VT = N->getSimpleValueType(0);
7412 SDLoc DL(N);
7413
7414 if (!Subtarget.hasConditionalMoveFusion()) {
7415 // (select c, -1, y) -> -c | y
7416 if (isAllOnesConstant(TrueV)) {
7417 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7418 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
7419 }
7420 // (select c, y, -1) -> (c-1) | y
7421 if (isAllOnesConstant(FalseV)) {
7422 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7423 DAG.getAllOnesConstant(DL, VT));
7424 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
7425 }
7426
7427 // (select c, 0, y) -> (c-1) & y
7428 if (isNullConstant(TrueV)) {
7429 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7430 DAG.getAllOnesConstant(DL, VT));
7431 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
7432 }
7433 // (select c, y, 0) -> -c & y
7434 if (isNullConstant(FalseV)) {
7435 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7436 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
7437 }
7438 }
7439
7440 // select c, ~x, x --> xor -c, x
7441 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7442 const APInt &TrueVal = TrueV->getAsAPIntVal();
7443 const APInt &FalseVal = FalseV->getAsAPIntVal();
7444 if (~TrueVal == FalseVal) {
7445 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7446 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
7447 }
7448 }
7449
7450 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
7451 // when both truev and falsev are also setcc.
7452 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
7453 FalseV.getOpcode() == ISD::SETCC) {
7454 SDValue LHS = CondV.getOperand(0);
7455 SDValue RHS = CondV.getOperand(1);
7456 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7457
7458 // (select x, x, y) -> x | y
7459 // (select !x, x, y) -> x & y
7460 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
7461 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
7462 DAG.getFreeze(FalseV));
7463 }
7464 // (select x, y, x) -> x & y
7465 // (select !x, y, x) -> x | y
7466 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
7467 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
7468 DAG.getFreeze(TrueV), FalseV);
7469 }
7470 }
7471
7472 return SDValue();
7473}
7474
7475// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
7476// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
7477// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
7478// being `0` or `-1`. In such cases we can replace `select` with `and`.
7479// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
7480// than `c0`?
7481static SDValue
7483 const RISCVSubtarget &Subtarget) {
7484 if (Subtarget.hasShortForwardBranchOpt())
7485 return SDValue();
7486
7487 unsigned SelOpNo = 0;
7488 SDValue Sel = BO->getOperand(0);
7489 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
7490 SelOpNo = 1;
7491 Sel = BO->getOperand(1);
7492 }
7493
7494 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
7495 return SDValue();
7496
7497 unsigned ConstSelOpNo = 1;
7498 unsigned OtherSelOpNo = 2;
7499 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
7500 ConstSelOpNo = 2;
7501 OtherSelOpNo = 1;
7502 }
7503 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
7504 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
7505 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
7506 return SDValue();
7507
7508 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
7509 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
7510 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
7511 return SDValue();
7512
7513 SDLoc DL(Sel);
7514 EVT VT = BO->getValueType(0);
7515
7516 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
7517 if (SelOpNo == 1)
7518 std::swap(NewConstOps[0], NewConstOps[1]);
7519
7520 SDValue NewConstOp =
7521 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
7522 if (!NewConstOp)
7523 return SDValue();
7524
7525 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
7526 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
7527 return SDValue();
7528
7529 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
7530 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
7531 if (SelOpNo == 1)
7532 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
7533 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
7534
7535 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
7536 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
7537 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
7538}
7539
7540SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
7541 SDValue CondV = Op.getOperand(0);
7542 SDValue TrueV = Op.getOperand(1);
7543 SDValue FalseV = Op.getOperand(2);
7544 SDLoc DL(Op);
7545 MVT VT = Op.getSimpleValueType();
7546 MVT XLenVT = Subtarget.getXLenVT();
7547
7548 // Lower vector SELECTs to VSELECTs by splatting the condition.
7549 if (VT.isVector()) {
7550 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
7551 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
7552 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
7553 }
7554
7555 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
7556 // nodes to implement the SELECT. Performing the lowering here allows for
7557 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
7558 // sequence or RISCVISD::SELECT_CC node (branch-based select).
7559 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
7560 VT.isScalarInteger()) {
7561 // (select c, t, 0) -> (czero_eqz t, c)
7562 if (isNullConstant(FalseV))
7563 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
7564 // (select c, 0, f) -> (czero_nez f, c)
7565 if (isNullConstant(TrueV))
7566 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
7567
7568 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
7569 if (TrueV.getOpcode() == ISD::AND &&
7570 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
7571 return DAG.getNode(
7572 ISD::OR, DL, VT, TrueV,
7573 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7574 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
7575 if (FalseV.getOpcode() == ISD::AND &&
7576 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
7577 return DAG.getNode(
7578 ISD::OR, DL, VT, FalseV,
7579 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
7580
7581 // Try some other optimizations before falling back to generic lowering.
7582 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7583 return V;
7584
7585 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
7586 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
7587 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7588 const APInt &TrueVal = TrueV->getAsAPIntVal();
7589 const APInt &FalseVal = FalseV->getAsAPIntVal();
7590 const int TrueValCost = RISCVMatInt::getIntMatCost(
7591 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7592 const int FalseValCost = RISCVMatInt::getIntMatCost(
7593 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7594 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
7595 SDValue LHSVal = DAG.getConstant(
7596 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
7597 SDValue RHSVal =
7598 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
7599 SDValue CMOV =
7601 DL, VT, LHSVal, CondV);
7602 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
7603 }
7604
7605 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
7606 // Unless we have the short forward branch optimization.
7607 if (!Subtarget.hasConditionalMoveFusion())
7608 return DAG.getNode(
7609 ISD::OR, DL, VT,
7610 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
7611 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7612 }
7613
7614 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7615 return V;
7616
7617 if (Op.hasOneUse()) {
7618 unsigned UseOpc = Op->use_begin()->getOpcode();
7619 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
7620 SDNode *BinOp = *Op->use_begin();
7621 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
7622 DAG, Subtarget)) {
7623 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
7624 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
7625 // may return a constant node and cause crash in lowerSELECT.
7626 if (NewSel.getOpcode() == ISD::SELECT)
7627 return lowerSELECT(NewSel, DAG);
7628 return NewSel;
7629 }
7630 }
7631 }
7632
7633 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
7634 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
7635 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
7636 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
7637 if (FPTV && FPFV) {
7638 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
7639 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
7640 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
7641 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
7642 DAG.getConstant(1, DL, XLenVT));
7643 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
7644 }
7645 }
7646
7647 // If the condition is not an integer SETCC which operates on XLenVT, we need
7648 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
7649 // (select condv, truev, falsev)
7650 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
7651 if (CondV.getOpcode() != ISD::SETCC ||
7652 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
7653 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
7654 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
7655
7656 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
7657
7658 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7659 }
7660
7661 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7662 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7663 // advantage of the integer compare+branch instructions. i.e.:
7664 // (select (setcc lhs, rhs, cc), truev, falsev)
7665 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7666 SDValue LHS = CondV.getOperand(0);
7667 SDValue RHS = CondV.getOperand(1);
7668 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7669
7670 // Special case for a select of 2 constants that have a diffence of 1.
7671 // Normally this is done by DAGCombine, but if the select is introduced by
7672 // type legalization or op legalization, we miss it. Restricting to SETLT
7673 // case for now because that is what signed saturating add/sub need.
7674 // FIXME: We don't need the condition to be SETLT or even a SETCC,
7675 // but we would probably want to swap the true/false values if the condition
7676 // is SETGE/SETLE to avoid an XORI.
7677 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
7678 CCVal == ISD::SETLT) {
7679 const APInt &TrueVal = TrueV->getAsAPIntVal();
7680 const APInt &FalseVal = FalseV->getAsAPIntVal();
7681 if (TrueVal - 1 == FalseVal)
7682 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
7683 if (TrueVal + 1 == FalseVal)
7684 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
7685 }
7686
7687 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7688 // 1 < x ? x : 1 -> 0 < x ? x : 1
7689 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
7690 RHS == TrueV && LHS == FalseV) {
7691 LHS = DAG.getConstant(0, DL, VT);
7692 // 0 <u x is the same as x != 0.
7693 if (CCVal == ISD::SETULT) {
7694 std::swap(LHS, RHS);
7695 CCVal = ISD::SETNE;
7696 }
7697 }
7698
7699 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
7700 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
7701 RHS == FalseV) {
7702 RHS = DAG.getConstant(0, DL, VT);
7703 }
7704
7705 SDValue TargetCC = DAG.getCondCode(CCVal);
7706
7707 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
7708 // (select (setcc lhs, rhs, CC), constant, falsev)
7709 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7710 std::swap(TrueV, FalseV);
7711 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
7712 }
7713
7714 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
7715 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7716}
7717
7718SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
7719 SDValue CondV = Op.getOperand(1);
7720 SDLoc DL(Op);
7721 MVT XLenVT = Subtarget.getXLenVT();
7722
7723 if (CondV.getOpcode() == ISD::SETCC &&
7724 CondV.getOperand(0).getValueType() == XLenVT) {
7725 SDValue LHS = CondV.getOperand(0);
7726 SDValue RHS = CondV.getOperand(1);
7727 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7728
7729 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7730
7731 SDValue TargetCC = DAG.getCondCode(CCVal);
7732 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7733 LHS, RHS, TargetCC, Op.getOperand(2));
7734 }
7735
7736 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7737 CondV, DAG.getConstant(0, DL, XLenVT),
7738 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
7739}
7740
7741SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
7744
7745 SDLoc DL(Op);
7746 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
7748
7749 // vastart just stores the address of the VarArgsFrameIndex slot into the
7750 // memory location argument.
7751 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7752 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
7753 MachinePointerInfo(SV));
7754}
7755
7756SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
7757 SelectionDAG &DAG) const {
7758 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7760 MachineFrameInfo &MFI = MF.getFrameInfo();
7761 MFI.setFrameAddressIsTaken(true);
7762 Register FrameReg = RI.getFrameRegister(MF);
7763 int XLenInBytes = Subtarget.getXLen() / 8;
7764
7765 EVT VT = Op.getValueType();
7766 SDLoc DL(Op);
7767 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
7768 unsigned Depth = Op.getConstantOperandVal(0);
7769 while (Depth--) {
7770 int Offset = -(XLenInBytes * 2);
7771 SDValue Ptr = DAG.getNode(
7772 ISD::ADD, DL, VT, FrameAddr,
7774 FrameAddr =
7775 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
7776 }
7777 return FrameAddr;
7778}
7779
7780SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
7781 SelectionDAG &DAG) const {
7782 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7784 MachineFrameInfo &MFI = MF.getFrameInfo();
7785 MFI.setReturnAddressIsTaken(true);
7786 MVT XLenVT = Subtarget.getXLenVT();
7787 int XLenInBytes = Subtarget.getXLen() / 8;
7788
7790 return SDValue();
7791
7792 EVT VT = Op.getValueType();
7793 SDLoc DL(Op);
7794 unsigned Depth = Op.getConstantOperandVal(0);
7795 if (Depth) {
7796 int Off = -XLenInBytes;
7797 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
7798 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
7799 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
7800 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
7802 }
7803
7804 // Return the value of the return address register, marking it an implicit
7805 // live-in.
7806 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
7807 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
7808}
7809
7810SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
7811 SelectionDAG &DAG) const {
7812 SDLoc DL(Op);
7813 SDValue Lo = Op.getOperand(0);
7814 SDValue Hi = Op.getOperand(1);
7815 SDValue Shamt = Op.getOperand(2);
7816 EVT VT = Lo.getValueType();
7817
7818 // if Shamt-XLEN < 0: // Shamt < XLEN
7819 // Lo = Lo << Shamt
7820 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
7821 // else:
7822 // Lo = 0
7823 // Hi = Lo << (Shamt-XLEN)
7824
7825 SDValue Zero = DAG.getConstant(0, DL, VT);
7826 SDValue One = DAG.getConstant(1, DL, VT);
7827 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
7828 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7829 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7830 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7831
7832 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
7833 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
7834 SDValue ShiftRightLo =
7835 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
7836 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
7837 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
7838 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
7839
7840 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7841
7842 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
7843 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7844
7845 SDValue Parts[2] = {Lo, Hi};
7846 return DAG.getMergeValues(Parts, DL);
7847}
7848
7849SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
7850 bool IsSRA) const {
7851 SDLoc DL(Op);
7852 SDValue Lo = Op.getOperand(0);
7853 SDValue Hi = Op.getOperand(1);
7854 SDValue Shamt = Op.getOperand(2);
7855 EVT VT = Lo.getValueType();
7856
7857 // SRA expansion:
7858 // if Shamt-XLEN < 0: // Shamt < XLEN
7859 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7860 // Hi = Hi >>s Shamt
7861 // else:
7862 // Lo = Hi >>s (Shamt-XLEN);
7863 // Hi = Hi >>s (XLEN-1)
7864 //
7865 // SRL expansion:
7866 // if Shamt-XLEN < 0: // Shamt < XLEN
7867 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7868 // Hi = Hi >>u Shamt
7869 // else:
7870 // Lo = Hi >>u (Shamt-XLEN);
7871 // Hi = 0;
7872
7873 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
7874
7875 SDValue Zero = DAG.getConstant(0, DL, VT);
7876 SDValue One = DAG.getConstant(1, DL, VT);
7877 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
7878 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7879 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7880 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7881
7882 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
7883 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
7884 SDValue ShiftLeftHi =
7885 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
7886 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
7887 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
7888 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
7889 SDValue HiFalse =
7890 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
7891
7892 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7893
7894 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
7895 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7896
7897 SDValue Parts[2] = {Lo, Hi};
7898 return DAG.getMergeValues(Parts, DL);
7899}
7900
7901// Lower splats of i1 types to SETCC. For each mask vector type, we have a
7902// legal equivalently-sized i8 type, so we can use that as a go-between.
7903SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
7904 SelectionDAG &DAG) const {
7905 SDLoc DL(Op);
7906 MVT VT = Op.getSimpleValueType();
7907 SDValue SplatVal = Op.getOperand(0);
7908 // All-zeros or all-ones splats are handled specially.
7909 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
7910 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7911 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
7912 }
7913 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
7914 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7915 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
7916 }
7917 MVT InterVT = VT.changeVectorElementType(MVT::i8);
7918 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
7919 DAG.getConstant(1, DL, SplatVal.getValueType()));
7920 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
7921 SDValue Zero = DAG.getConstant(0, DL, InterVT);
7922 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
7923}
7924
7925// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
7926// illegal (currently only vXi64 RV32).
7927// FIXME: We could also catch non-constant sign-extended i32 values and lower
7928// them to VMV_V_X_VL.
7929SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
7930 SelectionDAG &DAG) const {
7931 SDLoc DL(Op);
7932 MVT VecVT = Op.getSimpleValueType();
7933 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
7934 "Unexpected SPLAT_VECTOR_PARTS lowering");
7935
7936 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
7937 SDValue Lo = Op.getOperand(0);
7938 SDValue Hi = Op.getOperand(1);
7939
7940 MVT ContainerVT = VecVT;
7941 if (VecVT.isFixedLengthVector())
7942 ContainerVT = getContainerForFixedLengthVector(VecVT);
7943
7944 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7945
7946 SDValue Res =
7947 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
7948
7949 if (VecVT.isFixedLengthVector())
7950 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
7951
7952 return Res;
7953}
7954
7955// Custom-lower extensions from mask vectors by using a vselect either with 1
7956// for zero/any-extension or -1 for sign-extension:
7957// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
7958// Note that any-extension is lowered identically to zero-extension.
7959SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
7960 int64_t ExtTrueVal) const {
7961 SDLoc DL(Op);
7962 MVT VecVT = Op.getSimpleValueType();
7963 SDValue Src = Op.getOperand(0);
7964 // Only custom-lower extensions from mask types
7965 assert(Src.getValueType().isVector() &&
7966 Src.getValueType().getVectorElementType() == MVT::i1);
7967
7968 if (VecVT.isScalableVector()) {
7969 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
7970 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
7971 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
7972 }
7973
7974 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
7975 MVT I1ContainerVT =
7976 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
7977
7978 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
7979
7980 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7981
7982 MVT XLenVT = Subtarget.getXLenVT();
7983 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
7984 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
7985
7986 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7987 DAG.getUNDEF(ContainerVT), SplatZero, VL);
7988 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7989 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
7990 SDValue Select =
7991 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
7992 SplatZero, DAG.getUNDEF(ContainerVT), VL);
7993
7994 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
7995}
7996
7997SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
7998 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
7999 MVT ExtVT = Op.getSimpleValueType();
8000 // Only custom-lower extensions from fixed-length vector types.
8001 if (!ExtVT.isFixedLengthVector())
8002 return Op;
8003 MVT VT = Op.getOperand(0).getSimpleValueType();
8004 // Grab the canonical container type for the extended type. Infer the smaller
8005 // type from that to ensure the same number of vector elements, as we know
8006 // the LMUL will be sufficient to hold the smaller type.
8007 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
8008 // Get the extended container type manually to ensure the same number of
8009 // vector elements between source and dest.
8010 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
8011 ContainerExtVT.getVectorElementCount());
8012
8013 SDValue Op1 =
8014 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
8015
8016 SDLoc DL(Op);
8017 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8018
8019 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
8020
8021 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
8022}
8023
8024// Custom-lower truncations from vectors to mask vectors by using a mask and a
8025// setcc operation:
8026// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
8027SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
8028 SelectionDAG &DAG) const {
8029 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8030 SDLoc DL(Op);
8031 EVT MaskVT = Op.getValueType();
8032 // Only expect to custom-lower truncations to mask types
8033 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
8034 "Unexpected type for vector mask lowering");
8035 SDValue Src = Op.getOperand(0);
8036 MVT VecVT = Src.getSimpleValueType();
8037 SDValue Mask, VL;
8038 if (IsVPTrunc) {
8039 Mask = Op.getOperand(1);
8040 VL = Op.getOperand(2);
8041 }
8042 // If this is a fixed vector, we need to convert it to a scalable vector.
8043 MVT ContainerVT = VecVT;
8044
8045 if (VecVT.isFixedLengthVector()) {
8046 ContainerVT = getContainerForFixedLengthVector(VecVT);
8047 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8048 if (IsVPTrunc) {
8049 MVT MaskContainerVT =
8050 getContainerForFixedLengthVector(Mask.getSimpleValueType());
8051 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
8052 }
8053 }
8054
8055 if (!IsVPTrunc) {
8056 std::tie(Mask, VL) =
8057 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8058 }
8059
8060 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
8061 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8062
8063 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8064 DAG.getUNDEF(ContainerVT), SplatOne, VL);
8065 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8066 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8067
8068 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
8069 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
8070 DAG.getUNDEF(ContainerVT), Mask, VL);
8071 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
8072 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
8073 DAG.getUNDEF(MaskContainerVT), Mask, VL});
8074 if (MaskVT.isFixedLengthVector())
8075 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
8076 return Trunc;
8077}
8078
8079SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
8080 SelectionDAG &DAG) const {
8081 unsigned Opc = Op.getOpcode();
8082 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
8083 SDLoc DL(Op);
8084
8085 MVT VT = Op.getSimpleValueType();
8086 // Only custom-lower vector truncates
8087 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8088
8089 // Truncates to mask types are handled differently
8090 if (VT.getVectorElementType() == MVT::i1)
8091 return lowerVectorMaskTruncLike(Op, DAG);
8092
8093 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
8094 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
8095 // truncate by one power of two at a time.
8096 MVT DstEltVT = VT.getVectorElementType();
8097
8098 SDValue Src = Op.getOperand(0);
8099 MVT SrcVT = Src.getSimpleValueType();
8100 MVT SrcEltVT = SrcVT.getVectorElementType();
8101
8102 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
8103 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
8104 "Unexpected vector truncate lowering");
8105
8106 MVT ContainerVT = SrcVT;
8107 SDValue Mask, VL;
8108 if (IsVPTrunc) {
8109 Mask = Op.getOperand(1);
8110 VL = Op.getOperand(2);
8111 }
8112 if (SrcVT.isFixedLengthVector()) {
8113 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8114 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8115 if (IsVPTrunc) {
8116 MVT MaskVT = getMaskTypeFor(ContainerVT);
8117 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8118 }
8119 }
8120
8121 SDValue Result = Src;
8122 if (!IsVPTrunc) {
8123 std::tie(Mask, VL) =
8124 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8125 }
8126
8127 unsigned NewOpc;
8128 if (Opc == ISD::TRUNCATE_SSAT_S)
8130 else if (Opc == ISD::TRUNCATE_USAT_U)
8132 else
8134
8135 do {
8136 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8137 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
8138 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
8139 } while (SrcEltVT != DstEltVT);
8140
8141 if (SrcVT.isFixedLengthVector())
8142 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8143
8144 return Result;
8145}
8146
8147SDValue
8148RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8149 SelectionDAG &DAG) const {
8150 SDLoc DL(Op);
8151 SDValue Chain = Op.getOperand(0);
8152 SDValue Src = Op.getOperand(1);
8153 MVT VT = Op.getSimpleValueType();
8154 MVT SrcVT = Src.getSimpleValueType();
8155 MVT ContainerVT = VT;
8156 if (VT.isFixedLengthVector()) {
8157 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8158 ContainerVT =
8159 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8160 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8161 }
8162
8163 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8164
8165 // RVV can only widen/truncate fp to types double/half the size as the source.
8166 if ((VT.getVectorElementType() == MVT::f64 &&
8167 (SrcVT.getVectorElementType() == MVT::f16 ||
8168 SrcVT.getVectorElementType() == MVT::bf16)) ||
8169 ((VT.getVectorElementType() == MVT::f16 ||
8170 VT.getVectorElementType() == MVT::bf16) &&
8171 SrcVT.getVectorElementType() == MVT::f64)) {
8172 // For double rounding, the intermediate rounding should be round-to-odd.
8173 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8176 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8177 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8178 Chain, Src, Mask, VL);
8179 Chain = Src.getValue(1);
8180 }
8181
8182 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8185 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8186 Chain, Src, Mask, VL);
8187 if (VT.isFixedLengthVector()) {
8188 // StrictFP operations have two result values. Their lowered result should
8189 // have same result count.
8190 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8191 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8192 }
8193 return Res;
8194}
8195
8196SDValue
8197RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8198 SelectionDAG &DAG) const {
8199 bool IsVP =
8200 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8201 bool IsExtend =
8202 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8203 // RVV can only do truncate fp to types half the size as the source. We
8204 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8205 // conversion instruction.
8206 SDLoc DL(Op);
8207 MVT VT = Op.getSimpleValueType();
8208
8209 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8210
8211 SDValue Src = Op.getOperand(0);
8212 MVT SrcVT = Src.getSimpleValueType();
8213
8214 bool IsDirectExtend =
8215 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8216 (SrcVT.getVectorElementType() != MVT::f16 &&
8217 SrcVT.getVectorElementType() != MVT::bf16));
8218 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
8219 VT.getVectorElementType() != MVT::bf16) ||
8220 SrcVT.getVectorElementType() != MVT::f64);
8221
8222 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
8223
8224 // Prepare any fixed-length vector operands.
8225 MVT ContainerVT = VT;
8226 SDValue Mask, VL;
8227 if (IsVP) {
8228 Mask = Op.getOperand(1);
8229 VL = Op.getOperand(2);
8230 }
8231 if (VT.isFixedLengthVector()) {
8232 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8233 ContainerVT =
8234 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8235 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8236 if (IsVP) {
8237 MVT MaskVT = getMaskTypeFor(ContainerVT);
8238 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8239 }
8240 }
8241
8242 if (!IsVP)
8243 std::tie(Mask, VL) =
8244 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8245
8246 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
8247
8248 if (IsDirectConv) {
8249 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
8250 if (VT.isFixedLengthVector())
8251 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
8252 return Src;
8253 }
8254
8255 unsigned InterConvOpc =
8257
8258 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8259 SDValue IntermediateConv =
8260 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
8261 SDValue Result =
8262 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
8263 if (VT.isFixedLengthVector())
8264 return convertFromScalableVector(VT, Result, DAG, Subtarget);
8265 return Result;
8266}
8267
8268// Given a scalable vector type and an index into it, returns the type for the
8269// smallest subvector that the index fits in. This can be used to reduce LMUL
8270// for operations like vslidedown.
8271//
8272// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
8273static std::optional<MVT>
8274getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
8275 const RISCVSubtarget &Subtarget) {
8276 assert(VecVT.isScalableVector());
8277 const unsigned EltSize = VecVT.getScalarSizeInBits();
8278 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
8279 const unsigned MinVLMAX = VectorBitsMin / EltSize;
8280 MVT SmallerVT;
8281 if (MaxIdx < MinVLMAX)
8282 SmallerVT = getLMUL1VT(VecVT);
8283 else if (MaxIdx < MinVLMAX * 2)
8284 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
8285 else if (MaxIdx < MinVLMAX * 4)
8286 SmallerVT = getLMUL1VT(VecVT)
8289 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
8290 return std::nullopt;
8291 return SmallerVT;
8292}
8293
8294// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
8295// first position of a vector, and that vector is slid up to the insert index.
8296// By limiting the active vector length to index+1 and merging with the
8297// original vector (with an undisturbed tail policy for elements >= VL), we
8298// achieve the desired result of leaving all elements untouched except the one
8299// at VL-1, which is replaced with the desired value.
8300SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
8301 SelectionDAG &DAG) const {
8302 SDLoc DL(Op);
8303 MVT VecVT = Op.getSimpleValueType();
8304 SDValue Vec = Op.getOperand(0);
8305 SDValue Val = Op.getOperand(1);
8306 SDValue Idx = Op.getOperand(2);
8307
8308 if (VecVT.getVectorElementType() == MVT::i1) {
8309 // FIXME: For now we just promote to an i8 vector and insert into that,
8310 // but this is probably not optimal.
8311 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8312 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8313 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
8314 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
8315 }
8316
8317 MVT ContainerVT = VecVT;
8318 // If the operand is a fixed-length vector, convert to a scalable one.
8319 if (VecVT.isFixedLengthVector()) {
8320 ContainerVT = getContainerForFixedLengthVector(VecVT);
8321 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8322 }
8323
8324 // If we know the index we're going to insert at, we can shrink Vec so that
8325 // we're performing the scalar inserts and slideup on a smaller LMUL.
8326 MVT OrigContainerVT = ContainerVT;
8327 SDValue OrigVec = Vec;
8328 SDValue AlignedIdx;
8329 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
8330 const unsigned OrigIdx = IdxC->getZExtValue();
8331 // Do we know an upper bound on LMUL?
8332 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
8333 DL, DAG, Subtarget)) {
8334 ContainerVT = *ShrunkVT;
8335 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
8336 }
8337
8338 // If we're compiling for an exact VLEN value, we can always perform
8339 // the insert in m1 as we can determine the register corresponding to
8340 // the index in the register group.
8341 const MVT M1VT = getLMUL1VT(ContainerVT);
8342 if (auto VLEN = Subtarget.getRealVLen();
8343 VLEN && ContainerVT.bitsGT(M1VT)) {
8344 EVT ElemVT = VecVT.getVectorElementType();
8345 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
8346 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8347 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8348 unsigned ExtractIdx =
8349 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8350 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
8351 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8352 ContainerVT = M1VT;
8353 }
8354
8355 if (AlignedIdx)
8356 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8357 AlignedIdx);
8358 }
8359
8360 MVT XLenVT = Subtarget.getXLenVT();
8361
8362 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
8363 // Even i64-element vectors on RV32 can be lowered without scalar
8364 // legalization if the most-significant 32 bits of the value are not affected
8365 // by the sign-extension of the lower 32 bits.
8366 // TODO: We could also catch sign extensions of a 32-bit value.
8367 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
8368 const auto *CVal = cast<ConstantSDNode>(Val);
8369 if (isInt<32>(CVal->getSExtValue())) {
8370 IsLegalInsert = true;
8371 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
8372 }
8373 }
8374
8375 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8376
8377 SDValue ValInVec;
8378
8379 if (IsLegalInsert) {
8380 unsigned Opc =
8382 if (isNullConstant(Idx)) {
8383 if (!VecVT.isFloatingPoint())
8384 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
8385 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
8386
8387 if (AlignedIdx)
8388 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8389 Vec, AlignedIdx);
8390 if (!VecVT.isFixedLengthVector())
8391 return Vec;
8392 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
8393 }
8394 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
8395 } else {
8396 // On RV32, i64-element vectors must be specially handled to place the
8397 // value at element 0, by using two vslide1down instructions in sequence on
8398 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
8399 // this.
8400 SDValue ValLo, ValHi;
8401 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
8402 MVT I32ContainerVT =
8403 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
8404 SDValue I32Mask =
8405 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
8406 // Limit the active VL to two.
8407 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
8408 // If the Idx is 0 we can insert directly into the vector.
8409 if (isNullConstant(Idx)) {
8410 // First slide in the lo value, then the hi in above it. We use slide1down
8411 // to avoid the register group overlap constraint of vslide1up.
8412 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8413 Vec, Vec, ValLo, I32Mask, InsertI64VL);
8414 // If the source vector is undef don't pass along the tail elements from
8415 // the previous slide1down.
8416 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
8417 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8418 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
8419 // Bitcast back to the right container type.
8420 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8421
8422 if (AlignedIdx)
8423 ValInVec =
8424 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8425 ValInVec, AlignedIdx);
8426 if (!VecVT.isFixedLengthVector())
8427 return ValInVec;
8428 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
8429 }
8430
8431 // First slide in the lo value, then the hi in above it. We use slide1down
8432 // to avoid the register group overlap constraint of vslide1up.
8433 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8434 DAG.getUNDEF(I32ContainerVT),
8435 DAG.getUNDEF(I32ContainerVT), ValLo,
8436 I32Mask, InsertI64VL);
8437 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8438 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
8439 I32Mask, InsertI64VL);
8440 // Bitcast back to the right container type.
8441 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8442 }
8443
8444 // Now that the value is in a vector, slide it into position.
8445 SDValue InsertVL =
8446 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
8447
8448 // Use tail agnostic policy if Idx is the last index of Vec.
8450 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
8451 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
8452 Policy = RISCVII::TAIL_AGNOSTIC;
8453 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
8454 Idx, Mask, InsertVL, Policy);
8455
8456 if (AlignedIdx)
8457 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8458 Slideup, AlignedIdx);
8459 if (!VecVT.isFixedLengthVector())
8460 return Slideup;
8461 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
8462}
8463
8464// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
8465// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
8466// types this is done using VMV_X_S to allow us to glean information about the
8467// sign bits of the result.
8468SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
8469 SelectionDAG &DAG) const {
8470 SDLoc DL(Op);
8471 SDValue Idx = Op.getOperand(1);
8472 SDValue Vec = Op.getOperand(0);
8473 EVT EltVT = Op.getValueType();
8474 MVT VecVT = Vec.getSimpleValueType();
8475 MVT XLenVT = Subtarget.getXLenVT();
8476
8477 if (VecVT.getVectorElementType() == MVT::i1) {
8478 // Use vfirst.m to extract the first bit.
8479 if (isNullConstant(Idx)) {
8480 MVT ContainerVT = VecVT;
8481 if (VecVT.isFixedLengthVector()) {
8482 ContainerVT = getContainerForFixedLengthVector(VecVT);
8483 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8484 }
8485 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8486 SDValue Vfirst =
8487 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
8488 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
8489 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
8490 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8491 }
8492 if (VecVT.isFixedLengthVector()) {
8493 unsigned NumElts = VecVT.getVectorNumElements();
8494 if (NumElts >= 8) {
8495 MVT WideEltVT;
8496 unsigned WidenVecLen;
8497 SDValue ExtractElementIdx;
8498 SDValue ExtractBitIdx;
8499 unsigned MaxEEW = Subtarget.getELen();
8500 MVT LargestEltVT = MVT::getIntegerVT(
8501 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
8502 if (NumElts <= LargestEltVT.getSizeInBits()) {
8503 assert(isPowerOf2_32(NumElts) &&
8504 "the number of elements should be power of 2");
8505 WideEltVT = MVT::getIntegerVT(NumElts);
8506 WidenVecLen = 1;
8507 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
8508 ExtractBitIdx = Idx;
8509 } else {
8510 WideEltVT = LargestEltVT;
8511 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
8512 // extract element index = index / element width
8513 ExtractElementIdx = DAG.getNode(
8514 ISD::SRL, DL, XLenVT, Idx,
8515 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
8516 // mask bit index = index % element width
8517 ExtractBitIdx = DAG.getNode(
8518 ISD::AND, DL, XLenVT, Idx,
8519 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
8520 }
8521 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
8522 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
8523 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
8524 Vec, ExtractElementIdx);
8525 // Extract the bit from GPR.
8526 SDValue ShiftRight =
8527 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
8528 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
8529 DAG.getConstant(1, DL, XLenVT));
8530 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8531 }
8532 }
8533 // Otherwise, promote to an i8 vector and extract from that.
8534 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8535 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8536 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
8537 }
8538
8539 // If this is a fixed vector, we need to convert it to a scalable vector.
8540 MVT ContainerVT = VecVT;
8541 if (VecVT.isFixedLengthVector()) {
8542 ContainerVT = getContainerForFixedLengthVector(VecVT);
8543 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8544 }
8545
8546 // If we're compiling for an exact VLEN value and we have a known
8547 // constant index, we can always perform the extract in m1 (or
8548 // smaller) as we can determine the register corresponding to
8549 // the index in the register group.
8550 const auto VLen = Subtarget.getRealVLen();
8551 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
8552 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
8553 MVT M1VT = getLMUL1VT(ContainerVT);
8554 unsigned OrigIdx = IdxC->getZExtValue();
8555 EVT ElemVT = VecVT.getVectorElementType();
8556 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
8557 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8558 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8559 unsigned ExtractIdx =
8560 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8561 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
8562 DAG.getVectorIdxConstant(ExtractIdx, DL));
8563 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8564 ContainerVT = M1VT;
8565 }
8566
8567 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
8568 // contains our index.
8569 std::optional<uint64_t> MaxIdx;
8570 if (VecVT.isFixedLengthVector())
8571 MaxIdx = VecVT.getVectorNumElements() - 1;
8572 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
8573 MaxIdx = IdxC->getZExtValue();
8574 if (MaxIdx) {
8575 if (auto SmallerVT =
8576 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
8577 ContainerVT = *SmallerVT;
8578 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8579 DAG.getConstant(0, DL, XLenVT));
8580 }
8581 }
8582
8583 // If after narrowing, the required slide is still greater than LMUL2,
8584 // fallback to generic expansion and go through the stack. This is done
8585 // for a subtle reason: extracting *all* elements out of a vector is
8586 // widely expected to be linear in vector size, but because vslidedown
8587 // is linear in LMUL, performing N extracts using vslidedown becomes
8588 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
8589 // seems to have the same problem (the store is linear in LMUL), but the
8590 // generic expansion *memoizes* the store, and thus for many extracts of
8591 // the same vector we end up with one store and a bunch of loads.
8592 // TODO: We don't have the same code for insert_vector_elt because we
8593 // have BUILD_VECTOR and handle the degenerate case there. Should we
8594 // consider adding an inverse BUILD_VECTOR node?
8595 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
8596 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
8597 return SDValue();
8598
8599 // If the index is 0, the vector is already in the right position.
8600 if (!isNullConstant(Idx)) {
8601 // Use a VL of 1 to avoid processing more elements than we need.
8602 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
8603 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
8604 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
8605 }
8606
8607 if (!EltVT.isInteger()) {
8608 // Floating-point extracts are handled in TableGen.
8609 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
8610 DAG.getVectorIdxConstant(0, DL));
8611 }
8612
8613 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
8614 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
8615}
8616
8617// Some RVV intrinsics may claim that they want an integer operand to be
8618// promoted or expanded.
8620 const RISCVSubtarget &Subtarget) {
8621 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
8622 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
8623 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
8624 "Unexpected opcode");
8625
8626 if (!Subtarget.hasVInstructions())
8627 return SDValue();
8628
8629 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8630 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8631 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8632
8633 SDLoc DL(Op);
8634
8636 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8637 if (!II || !II->hasScalarOperand())
8638 return SDValue();
8639
8640 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
8641 assert(SplatOp < Op.getNumOperands());
8642
8644 SDValue &ScalarOp = Operands[SplatOp];
8645 MVT OpVT = ScalarOp.getSimpleValueType();
8646 MVT XLenVT = Subtarget.getXLenVT();
8647
8648 // If this isn't a scalar, or its type is XLenVT we're done.
8649 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8650 return SDValue();
8651
8652 // Simplest case is that the operand needs to be promoted to XLenVT.
8653 if (OpVT.bitsLT(XLenVT)) {
8654 // If the operand is a constant, sign extend to increase our chances
8655 // of being able to use a .vi instruction. ANY_EXTEND would become a
8656 // a zero extend and the simm5 check in isel would fail.
8657 // FIXME: Should we ignore the upper bits in isel instead?
8658 unsigned ExtOpc =
8659 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8660 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8661 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8662 }
8663
8664 // Use the previous operand to get the vXi64 VT. The result might be a mask
8665 // VT for compares. Using the previous operand assumes that the previous
8666 // operand will never have a smaller element size than a scalar operand and
8667 // that a widening operation never uses SEW=64.
8668 // NOTE: If this fails the below assert, we can probably just find the
8669 // element count from any operand or result and use it to construct the VT.
8670 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
8671 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
8672
8673 // The more complex case is when the scalar is larger than XLenVT.
8674 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
8675 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
8676
8677 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
8678 // instruction to sign-extend since SEW>XLEN.
8679 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
8680 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
8681 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8682 }
8683
8684 switch (IntNo) {
8685 case Intrinsic::riscv_vslide1up:
8686 case Intrinsic::riscv_vslide1down:
8687 case Intrinsic::riscv_vslide1up_mask:
8688 case Intrinsic::riscv_vslide1down_mask: {
8689 // We need to special case these when the scalar is larger than XLen.
8690 unsigned NumOps = Op.getNumOperands();
8691 bool IsMasked = NumOps == 7;
8692
8693 // Convert the vector source to the equivalent nxvXi32 vector.
8694 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
8695 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
8696 SDValue ScalarLo, ScalarHi;
8697 std::tie(ScalarLo, ScalarHi) =
8698 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
8699
8700 // Double the VL since we halved SEW.
8701 SDValue AVL = getVLOperand(Op);
8702 SDValue I32VL;
8703
8704 // Optimize for constant AVL
8705 if (isa<ConstantSDNode>(AVL)) {
8706 const auto [MinVLMAX, MaxVLMAX] =
8708
8709 uint64_t AVLInt = AVL->getAsZExtVal();
8710 if (AVLInt <= MinVLMAX) {
8711 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
8712 } else if (AVLInt >= 2 * MaxVLMAX) {
8713 // Just set vl to VLMAX in this situation
8714 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
8715 } else {
8716 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
8717 // is related to the hardware implementation.
8718 // So let the following code handle
8719 }
8720 }
8721 if (!I32VL) {
8723 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8724 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
8725 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8726 SDValue SETVL =
8727 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
8728 // Using vsetvli instruction to get actually used length which related to
8729 // the hardware implementation
8730 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
8731 SEW, LMUL);
8732 I32VL =
8733 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
8734 }
8735
8736 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
8737
8738 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
8739 // instructions.
8740 SDValue Passthru;
8741 if (IsMasked)
8742 Passthru = DAG.getUNDEF(I32VT);
8743 else
8744 Passthru = DAG.getBitcast(I32VT, Operands[1]);
8745
8746 if (IntNo == Intrinsic::riscv_vslide1up ||
8747 IntNo == Intrinsic::riscv_vslide1up_mask) {
8748 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8749 ScalarHi, I32Mask, I32VL);
8750 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8751 ScalarLo, I32Mask, I32VL);
8752 } else {
8753 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8754 ScalarLo, I32Mask, I32VL);
8755 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8756 ScalarHi, I32Mask, I32VL);
8757 }
8758
8759 // Convert back to nxvXi64.
8760 Vec = DAG.getBitcast(VT, Vec);
8761
8762 if (!IsMasked)
8763 return Vec;
8764 // Apply mask after the operation.
8765 SDValue Mask = Operands[NumOps - 3];
8766 SDValue MaskedOff = Operands[1];
8767 // Assume Policy operand is the last operand.
8768 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
8769 // We don't need to select maskedoff if it's undef.
8770 if (MaskedOff.isUndef())
8771 return Vec;
8772 // TAMU
8773 if (Policy == RISCVII::TAIL_AGNOSTIC)
8774 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8775 DAG.getUNDEF(VT), AVL);
8776 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8777 // It's fine because vmerge does not care mask policy.
8778 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8779 MaskedOff, AVL);
8780 }
8781 }
8782
8783 // We need to convert the scalar to a splat vector.
8784 SDValue VL = getVLOperand(Op);
8785 assert(VL.getValueType() == XLenVT);
8786 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
8787 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8788}
8789
8790// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8791// scalable vector llvm.get.vector.length for now.
8792//
8793// We need to convert from a scalable VF to a vsetvli with VLMax equal to
8794// (vscale * VF). The vscale and VF are independent of element width. We use
8795// SEW=8 for the vsetvli because it is the only element width that supports all
8796// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8797// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
8798// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8799// SEW and LMUL are better for the surrounding vector instructions.
8801 const RISCVSubtarget &Subtarget) {
8802 MVT XLenVT = Subtarget.getXLenVT();
8803
8804 // The smallest LMUL is only valid for the smallest element width.
8805 const unsigned ElementWidth = 8;
8806
8807 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
8808 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
8809 // We don't support VF==1 with ELEN==32.
8810 [[maybe_unused]] unsigned MinVF =
8811 RISCV::RVVBitsPerBlock / Subtarget.getELen();
8812
8813 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
8814 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
8815 "Unexpected VF");
8816
8817 bool Fractional = VF < LMul1VF;
8818 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
8819 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
8820 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
8821
8822 SDLoc DL(N);
8823
8824 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
8825 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
8826
8827 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
8828
8829 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
8830 SDValue Res =
8831 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
8832 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
8833}
8834
8836 const RISCVSubtarget &Subtarget) {
8837 SDValue Op0 = N->getOperand(1);
8838 MVT OpVT = Op0.getSimpleValueType();
8839 MVT ContainerVT = OpVT;
8840 if (OpVT.isFixedLengthVector()) {
8841 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
8842 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
8843 }
8844 MVT XLenVT = Subtarget.getXLenVT();
8845 SDLoc DL(N);
8846 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
8847 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
8848 if (isOneConstant(N->getOperand(2)))
8849 return Res;
8850
8851 // Convert -1 to VL.
8852 SDValue Setcc =
8853 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
8854 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
8855 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
8856}
8857
8858static inline void promoteVCIXScalar(const SDValue &Op,
8860 SelectionDAG &DAG) {
8861 const RISCVSubtarget &Subtarget =
8863
8864 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8865 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8866 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8867 SDLoc DL(Op);
8868
8870 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8871 if (!II || !II->hasScalarOperand())
8872 return;
8873
8874 unsigned SplatOp = II->ScalarOperand + 1;
8875 assert(SplatOp < Op.getNumOperands());
8876
8877 SDValue &ScalarOp = Operands[SplatOp];
8878 MVT OpVT = ScalarOp.getSimpleValueType();
8879 MVT XLenVT = Subtarget.getXLenVT();
8880
8881 // The code below is partially copied from lowerVectorIntrinsicScalars.
8882 // If this isn't a scalar, or its type is XLenVT we're done.
8883 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8884 return;
8885
8886 // Manually emit promote operation for scalar operation.
8887 if (OpVT.bitsLT(XLenVT)) {
8888 unsigned ExtOpc =
8889 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8890 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8891 }
8892
8893 return;
8894}
8895
8896static void processVCIXOperands(SDValue &OrigOp,
8898 SelectionDAG &DAG) {
8899 promoteVCIXScalar(OrigOp, Operands, DAG);
8900 const RISCVSubtarget &Subtarget =
8902 for (SDValue &V : Operands) {
8903 EVT ValType = V.getValueType();
8904 if (ValType.isVector() && ValType.isFloatingPoint()) {
8905 MVT InterimIVT =
8906 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
8907 ValType.getVectorElementCount());
8908 V = DAG.getBitcast(InterimIVT, V);
8909 }
8910 if (ValType.isFixedLengthVector()) {
8911 MVT OpContainerVT = getContainerForFixedLengthVector(
8912 DAG, V.getSimpleValueType(), Subtarget);
8913 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
8914 }
8915 }
8916}
8917
8918// LMUL * VLEN should be greater than or equal to EGS * SEW
8919static inline bool isValidEGW(int EGS, EVT VT,
8920 const RISCVSubtarget &Subtarget) {
8921 return (Subtarget.getRealMinVLen() *
8923 EGS * VT.getScalarSizeInBits();
8924}
8925
8926SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8927 SelectionDAG &DAG) const {
8928 unsigned IntNo = Op.getConstantOperandVal(0);
8929 SDLoc DL(Op);
8930 MVT XLenVT = Subtarget.getXLenVT();
8931
8932 switch (IntNo) {
8933 default:
8934 break; // Don't custom lower most intrinsics.
8935 case Intrinsic::thread_pointer: {
8936 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8937 return DAG.getRegister(RISCV::X4, PtrVT);
8938 }
8939 case Intrinsic::riscv_orc_b:
8940 case Intrinsic::riscv_brev8:
8941 case Intrinsic::riscv_sha256sig0:
8942 case Intrinsic::riscv_sha256sig1:
8943 case Intrinsic::riscv_sha256sum0:
8944 case Intrinsic::riscv_sha256sum1:
8945 case Intrinsic::riscv_sm3p0:
8946 case Intrinsic::riscv_sm3p1: {
8947 unsigned Opc;
8948 switch (IntNo) {
8949 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
8950 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
8951 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
8952 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
8953 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
8954 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
8955 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
8956 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
8957 }
8958
8959 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8960 }
8961 case Intrinsic::riscv_sm4ks:
8962 case Intrinsic::riscv_sm4ed: {
8963 unsigned Opc =
8964 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
8965
8966 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
8967 Op.getOperand(3));
8968 }
8969 case Intrinsic::riscv_zip:
8970 case Intrinsic::riscv_unzip: {
8971 unsigned Opc =
8972 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
8973 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8974 }
8975 case Intrinsic::riscv_mopr:
8976 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
8977 Op.getOperand(2));
8978
8979 case Intrinsic::riscv_moprr: {
8980 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
8981 Op.getOperand(2), Op.getOperand(3));
8982 }
8983 case Intrinsic::riscv_clmul:
8984 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
8985 Op.getOperand(2));
8986 case Intrinsic::riscv_clmulh:
8987 case Intrinsic::riscv_clmulr: {
8988 unsigned Opc =
8989 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
8990 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
8991 }
8992 case Intrinsic::experimental_get_vector_length:
8993 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
8994 case Intrinsic::experimental_cttz_elts:
8995 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
8996 case Intrinsic::riscv_vmv_x_s: {
8997 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
8998 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
8999 }
9000 case Intrinsic::riscv_vfmv_f_s:
9001 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
9002 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
9003 case Intrinsic::riscv_vmv_v_x:
9004 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
9005 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
9006 Subtarget);
9007 case Intrinsic::riscv_vfmv_v_f:
9008 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
9009 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9010 case Intrinsic::riscv_vmv_s_x: {
9011 SDValue Scalar = Op.getOperand(2);
9012
9013 if (Scalar.getValueType().bitsLE(XLenVT)) {
9014 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
9015 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
9016 Op.getOperand(1), Scalar, Op.getOperand(3));
9017 }
9018
9019 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
9020
9021 // This is an i64 value that lives in two scalar registers. We have to
9022 // insert this in a convoluted way. First we build vXi64 splat containing
9023 // the two values that we assemble using some bit math. Next we'll use
9024 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
9025 // to merge element 0 from our splat into the source vector.
9026 // FIXME: This is probably not the best way to do this, but it is
9027 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
9028 // point.
9029 // sw lo, (a0)
9030 // sw hi, 4(a0)
9031 // vlse vX, (a0)
9032 //
9033 // vid.v vVid
9034 // vmseq.vx mMask, vVid, 0
9035 // vmerge.vvm vDest, vSrc, vVal, mMask
9036 MVT VT = Op.getSimpleValueType();
9037 SDValue Vec = Op.getOperand(1);
9038 SDValue VL = getVLOperand(Op);
9039
9040 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
9041 if (Op.getOperand(1).isUndef())
9042 return SplattedVal;
9043 SDValue SplattedIdx =
9044 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9045 DAG.getConstant(0, DL, MVT::i32), VL);
9046
9047 MVT MaskVT = getMaskTypeFor(VT);
9048 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9049 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9050 SDValue SelectCond =
9051 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9052 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9053 DAG.getUNDEF(MaskVT), Mask, VL});
9054 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9055 Vec, DAG.getUNDEF(VT), VL);
9056 }
9057 case Intrinsic::riscv_vfmv_s_f:
9058 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9059 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9060 // EGS * EEW >= 128 bits
9061 case Intrinsic::riscv_vaesdf_vv:
9062 case Intrinsic::riscv_vaesdf_vs:
9063 case Intrinsic::riscv_vaesdm_vv:
9064 case Intrinsic::riscv_vaesdm_vs:
9065 case Intrinsic::riscv_vaesef_vv:
9066 case Intrinsic::riscv_vaesef_vs:
9067 case Intrinsic::riscv_vaesem_vv:
9068 case Intrinsic::riscv_vaesem_vs:
9069 case Intrinsic::riscv_vaeskf1:
9070 case Intrinsic::riscv_vaeskf2:
9071 case Intrinsic::riscv_vaesz_vs:
9072 case Intrinsic::riscv_vsm4k:
9073 case Intrinsic::riscv_vsm4r_vv:
9074 case Intrinsic::riscv_vsm4r_vs: {
9075 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9076 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9077 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9078 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9079 return Op;
9080 }
9081 // EGS * EEW >= 256 bits
9082 case Intrinsic::riscv_vsm3c:
9083 case Intrinsic::riscv_vsm3me: {
9084 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9085 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9086 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9087 return Op;
9088 }
9089 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9090 case Intrinsic::riscv_vsha2ch:
9091 case Intrinsic::riscv_vsha2cl:
9092 case Intrinsic::riscv_vsha2ms: {
9093 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9094 !Subtarget.hasStdExtZvknhb())
9095 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9096 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9097 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9098 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9099 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9100 return Op;
9101 }
9102 case Intrinsic::riscv_sf_vc_v_x:
9103 case Intrinsic::riscv_sf_vc_v_i:
9104 case Intrinsic::riscv_sf_vc_v_xv:
9105 case Intrinsic::riscv_sf_vc_v_iv:
9106 case Intrinsic::riscv_sf_vc_v_vv:
9107 case Intrinsic::riscv_sf_vc_v_fv:
9108 case Intrinsic::riscv_sf_vc_v_xvv:
9109 case Intrinsic::riscv_sf_vc_v_ivv:
9110 case Intrinsic::riscv_sf_vc_v_vvv:
9111 case Intrinsic::riscv_sf_vc_v_fvv:
9112 case Intrinsic::riscv_sf_vc_v_xvw:
9113 case Intrinsic::riscv_sf_vc_v_ivw:
9114 case Intrinsic::riscv_sf_vc_v_vvw:
9115 case Intrinsic::riscv_sf_vc_v_fvw: {
9116 MVT VT = Op.getSimpleValueType();
9117
9118 SmallVector<SDValue> Operands{Op->op_values()};
9120
9121 MVT RetVT = VT;
9122 if (VT.isFixedLengthVector())
9124 else if (VT.isFloatingPoint())
9127
9128 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9129
9130 if (VT.isFixedLengthVector())
9131 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9132 else if (VT.isFloatingPoint())
9133 NewNode = DAG.getBitcast(VT, NewNode);
9134
9135 if (Op == NewNode)
9136 break;
9137
9138 return NewNode;
9139 }
9140 }
9141
9142 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9143}
9144
9146 unsigned Type) {
9147 SDLoc DL(Op);
9148 SmallVector<SDValue> Operands{Op->op_values()};
9149 Operands.erase(Operands.begin() + 1);
9150
9151 const RISCVSubtarget &Subtarget =
9153 MVT VT = Op.getSimpleValueType();
9154 MVT RetVT = VT;
9155 MVT FloatVT = VT;
9156
9157 if (VT.isFloatingPoint()) {
9158 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9160 FloatVT = RetVT;
9161 }
9162 if (VT.isFixedLengthVector())
9164 Subtarget);
9165
9167
9168 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9169 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9170 SDValue Chain = NewNode.getValue(1);
9171
9172 if (VT.isFixedLengthVector())
9173 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9174 if (VT.isFloatingPoint())
9175 NewNode = DAG.getBitcast(VT, NewNode);
9176
9177 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
9178
9179 return NewNode;
9180}
9181
9183 unsigned Type) {
9184 SmallVector<SDValue> Operands{Op->op_values()};
9185 Operands.erase(Operands.begin() + 1);
9187
9188 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
9189}
9190
9191SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
9192 SelectionDAG &DAG) const {
9193 unsigned IntNo = Op.getConstantOperandVal(1);
9194 switch (IntNo) {
9195 default:
9196 break;
9197 case Intrinsic::riscv_seg2_load:
9198 case Intrinsic::riscv_seg3_load:
9199 case Intrinsic::riscv_seg4_load:
9200 case Intrinsic::riscv_seg5_load:
9201 case Intrinsic::riscv_seg6_load:
9202 case Intrinsic::riscv_seg7_load:
9203 case Intrinsic::riscv_seg8_load: {
9204 SDLoc DL(Op);
9205 static const Intrinsic::ID VlsegInts[7] = {
9206 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
9207 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
9208 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
9209 Intrinsic::riscv_vlseg8};
9210 unsigned NF = Op->getNumValues() - 1;
9211 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9212 MVT XLenVT = Subtarget.getXLenVT();
9213 MVT VT = Op->getSimpleValueType(0);
9214 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9215
9216 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
9217 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
9218 auto *Load = cast<MemIntrinsicSDNode>(Op);
9219 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
9220 ContainerVTs.push_back(MVT::Other);
9221 SDVTList VTs = DAG.getVTList(ContainerVTs);
9222 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
9223 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
9224 Ops.push_back(Op.getOperand(2));
9225 Ops.push_back(VL);
9226 SDValue Result =
9228 Load->getMemoryVT(), Load->getMemOperand());
9230 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
9231 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
9232 DAG, Subtarget));
9233 Results.push_back(Result.getValue(NF));
9234 return DAG.getMergeValues(Results, DL);
9235 }
9236 case Intrinsic::riscv_sf_vc_v_x_se:
9238 case Intrinsic::riscv_sf_vc_v_i_se:
9240 case Intrinsic::riscv_sf_vc_v_xv_se:
9242 case Intrinsic::riscv_sf_vc_v_iv_se:
9244 case Intrinsic::riscv_sf_vc_v_vv_se:
9246 case Intrinsic::riscv_sf_vc_v_fv_se:
9248 case Intrinsic::riscv_sf_vc_v_xvv_se:
9250 case Intrinsic::riscv_sf_vc_v_ivv_se:
9252 case Intrinsic::riscv_sf_vc_v_vvv_se:
9254 case Intrinsic::riscv_sf_vc_v_fvv_se:
9256 case Intrinsic::riscv_sf_vc_v_xvw_se:
9258 case Intrinsic::riscv_sf_vc_v_ivw_se:
9260 case Intrinsic::riscv_sf_vc_v_vvw_se:
9262 case Intrinsic::riscv_sf_vc_v_fvw_se:
9264 }
9265
9266 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9267}
9268
9269SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9270 SelectionDAG &DAG) const {
9271 unsigned IntNo = Op.getConstantOperandVal(1);
9272 switch (IntNo) {
9273 default:
9274 break;
9275 case Intrinsic::riscv_seg2_store:
9276 case Intrinsic::riscv_seg3_store:
9277 case Intrinsic::riscv_seg4_store:
9278 case Intrinsic::riscv_seg5_store:
9279 case Intrinsic::riscv_seg6_store:
9280 case Intrinsic::riscv_seg7_store:
9281 case Intrinsic::riscv_seg8_store: {
9282 SDLoc DL(Op);
9283 static const Intrinsic::ID VssegInts[] = {
9284 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
9285 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
9286 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
9287 Intrinsic::riscv_vsseg8};
9288 // Operands are (chain, int_id, vec*, ptr, vl)
9289 unsigned NF = Op->getNumOperands() - 4;
9290 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9291 MVT XLenVT = Subtarget.getXLenVT();
9292 MVT VT = Op->getOperand(2).getSimpleValueType();
9293 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9294
9295 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
9296 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
9297 SDValue Ptr = Op->getOperand(NF + 2);
9298
9299 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
9300 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
9301 for (unsigned i = 0; i < NF; i++)
9303 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));
9304 Ops.append({Ptr, VL});
9305
9306 return DAG.getMemIntrinsicNode(
9307 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
9308 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
9309 }
9310 case Intrinsic::riscv_sf_vc_xv_se:
9312 case Intrinsic::riscv_sf_vc_iv_se:
9314 case Intrinsic::riscv_sf_vc_vv_se:
9316 case Intrinsic::riscv_sf_vc_fv_se:
9318 case Intrinsic::riscv_sf_vc_xvv_se:
9320 case Intrinsic::riscv_sf_vc_ivv_se:
9322 case Intrinsic::riscv_sf_vc_vvv_se:
9324 case Intrinsic::riscv_sf_vc_fvv_se:
9326 case Intrinsic::riscv_sf_vc_xvw_se:
9328 case Intrinsic::riscv_sf_vc_ivw_se:
9330 case Intrinsic::riscv_sf_vc_vvw_se:
9332 case Intrinsic::riscv_sf_vc_fvw_se:
9334 }
9335
9336 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9337}
9338
9339static unsigned getRVVReductionOp(unsigned ISDOpcode) {
9340 switch (ISDOpcode) {
9341 default:
9342 llvm_unreachable("Unhandled reduction");
9343 case ISD::VP_REDUCE_ADD:
9344 case ISD::VECREDUCE_ADD:
9346 case ISD::VP_REDUCE_UMAX:
9349 case ISD::VP_REDUCE_SMAX:
9352 case ISD::VP_REDUCE_UMIN:
9355 case ISD::VP_REDUCE_SMIN:
9358 case ISD::VP_REDUCE_AND:
9359 case ISD::VECREDUCE_AND:
9361 case ISD::VP_REDUCE_OR:
9362 case ISD::VECREDUCE_OR:
9364 case ISD::VP_REDUCE_XOR:
9365 case ISD::VECREDUCE_XOR:
9367 case ISD::VP_REDUCE_FADD:
9369 case ISD::VP_REDUCE_SEQ_FADD:
9371 case ISD::VP_REDUCE_FMAX:
9372 case ISD::VP_REDUCE_FMAXIMUM:
9374 case ISD::VP_REDUCE_FMIN:
9375 case ISD::VP_REDUCE_FMINIMUM:
9377 }
9378
9379}
9380
9381SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
9382 SelectionDAG &DAG,
9383 bool IsVP) const {
9384 SDLoc DL(Op);
9385 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
9386 MVT VecVT = Vec.getSimpleValueType();
9387 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
9388 Op.getOpcode() == ISD::VECREDUCE_OR ||
9389 Op.getOpcode() == ISD::VECREDUCE_XOR ||
9390 Op.getOpcode() == ISD::VP_REDUCE_AND ||
9391 Op.getOpcode() == ISD::VP_REDUCE_OR ||
9392 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
9393 "Unexpected reduction lowering");
9394
9395 MVT XLenVT = Subtarget.getXLenVT();
9396
9397 MVT ContainerVT = VecVT;
9398 if (VecVT.isFixedLengthVector()) {
9399 ContainerVT = getContainerForFixedLengthVector(VecVT);
9400 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9401 }
9402
9403 SDValue Mask, VL;
9404 if (IsVP) {
9405 Mask = Op.getOperand(2);
9406 VL = Op.getOperand(3);
9407 } else {
9408 std::tie(Mask, VL) =
9409 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9410 }
9411
9413 switch (Op.getOpcode()) {
9414 default:
9415 llvm_unreachable("Unhandled reduction");
9416 case ISD::VECREDUCE_AND:
9417 case ISD::VP_REDUCE_AND: {
9418 // vcpop ~x == 0
9419 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
9420 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
9421 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9422 CC = ISD::SETEQ;
9423 break;
9424 }
9425 case ISD::VECREDUCE_OR:
9426 case ISD::VP_REDUCE_OR:
9427 // vcpop x != 0
9428 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9429 CC = ISD::SETNE;
9430 break;
9431 case ISD::VECREDUCE_XOR:
9432 case ISD::VP_REDUCE_XOR: {
9433 // ((vcpop x) & 1) != 0
9434 SDValue One = DAG.getConstant(1, DL, XLenVT);
9435 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9436 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
9437 CC = ISD::SETNE;
9438 break;
9439 }
9440 }
9441
9442 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9443 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
9444 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
9445
9446 if (!IsVP)
9447 return SetCC;
9448
9449 // Now include the start value in the operation.
9450 // Note that we must return the start value when no elements are operated
9451 // upon. The vcpop instructions we've emitted in each case above will return
9452 // 0 for an inactive vector, and so we've already received the neutral value:
9453 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
9454 // can simply include the start value.
9455 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
9456 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
9457}
9458
9459static bool isNonZeroAVL(SDValue AVL) {
9460 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
9461 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
9462 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
9463 (ImmAVL && ImmAVL->getZExtValue() >= 1);
9464}
9465
9466/// Helper to lower a reduction sequence of the form:
9467/// scalar = reduce_op vec, scalar_start
9468static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
9469 SDValue StartValue, SDValue Vec, SDValue Mask,
9470 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
9471 const RISCVSubtarget &Subtarget) {
9472 const MVT VecVT = Vec.getSimpleValueType();
9473 const MVT M1VT = getLMUL1VT(VecVT);
9474 const MVT XLenVT = Subtarget.getXLenVT();
9475 const bool NonZeroAVL = isNonZeroAVL(VL);
9476
9477 // The reduction needs an LMUL1 input; do the splat at either LMUL1
9478 // or the original VT if fractional.
9479 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
9480 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
9481 // prove it is non-zero. For the AVL=0 case, we need the scalar to
9482 // be the result of the reduction operation.
9483 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
9484 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
9485 DAG, Subtarget);
9486 if (M1VT != InnerVT)
9487 InitialValue =
9488 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
9489 InitialValue, DAG.getVectorIdxConstant(0, DL));
9490 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
9491 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
9492 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
9493 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
9494 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
9495 DAG.getVectorIdxConstant(0, DL));
9496}
9497
9498SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
9499 SelectionDAG &DAG) const {
9500 SDLoc DL(Op);
9501 SDValue Vec = Op.getOperand(0);
9502 EVT VecEVT = Vec.getValueType();
9503
9504 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
9505
9506 // Due to ordering in legalize types we may have a vector type that needs to
9507 // be split. Do that manually so we can get down to a legal type.
9508 while (getTypeAction(*DAG.getContext(), VecEVT) ==
9510 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
9511 VecEVT = Lo.getValueType();
9512 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
9513 }
9514
9515 // TODO: The type may need to be widened rather than split. Or widened before
9516 // it can be split.
9517 if (!isTypeLegal(VecEVT))
9518 return SDValue();
9519
9520 MVT VecVT = VecEVT.getSimpleVT();
9521 MVT VecEltVT = VecVT.getVectorElementType();
9522 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9523
9524 MVT ContainerVT = VecVT;
9525 if (VecVT.isFixedLengthVector()) {
9526 ContainerVT = getContainerForFixedLengthVector(VecVT);
9527 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9528 }
9529
9530 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9531
9532 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
9533 switch (BaseOpc) {
9534 case ISD::AND:
9535 case ISD::OR:
9536 case ISD::UMAX:
9537 case ISD::UMIN:
9538 case ISD::SMAX:
9539 case ISD::SMIN:
9540 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
9541 DAG.getVectorIdxConstant(0, DL));
9542 }
9543 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
9544 Mask, VL, DL, DAG, Subtarget);
9545}
9546
9547// Given a reduction op, this function returns the matching reduction opcode,
9548// the vector SDValue and the scalar SDValue required to lower this to a
9549// RISCVISD node.
9550static std::tuple<unsigned, SDValue, SDValue>
9552 const RISCVSubtarget &Subtarget) {
9553 SDLoc DL(Op);
9554 auto Flags = Op->getFlags();
9555 unsigned Opcode = Op.getOpcode();
9556 switch (Opcode) {
9557 default:
9558 llvm_unreachable("Unhandled reduction");
9559 case ISD::VECREDUCE_FADD: {
9560 // Use positive zero if we can. It is cheaper to materialize.
9561 SDValue Zero =
9562 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
9563 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
9564 }
9566 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
9567 Op.getOperand(0));
9571 case ISD::VECREDUCE_FMAX: {
9572 SDValue Front =
9573 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
9574 DAG.getVectorIdxConstant(0, DL));
9575 unsigned RVVOpc =
9576 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
9579 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
9580 }
9581 }
9582}
9583
9584SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
9585 SelectionDAG &DAG) const {
9586 SDLoc DL(Op);
9587 MVT VecEltVT = Op.getSimpleValueType();
9588
9589 unsigned RVVOpcode;
9590 SDValue VectorVal, ScalarVal;
9591 std::tie(RVVOpcode, VectorVal, ScalarVal) =
9592 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
9593 MVT VecVT = VectorVal.getSimpleValueType();
9594
9595 MVT ContainerVT = VecVT;
9596 if (VecVT.isFixedLengthVector()) {
9597 ContainerVT = getContainerForFixedLengthVector(VecVT);
9598 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
9599 }
9600
9601 MVT ResVT = Op.getSimpleValueType();
9602 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9603 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
9604 VL, DL, DAG, Subtarget);
9605 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
9606 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
9607 return Res;
9608
9609 if (Op->getFlags().hasNoNaNs())
9610 return Res;
9611
9612 // Force output to NaN if any element is Nan.
9613 SDValue IsNan =
9614 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
9615 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
9616 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
9617 MVT XLenVT = Subtarget.getXLenVT();
9618 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
9619 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
9620 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9621 return DAG.getSelect(
9622 DL, ResVT, NoNaNs, Res,
9623 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
9624}
9625
9626SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
9627 SelectionDAG &DAG) const {
9628 SDLoc DL(Op);
9629 unsigned Opc = Op.getOpcode();
9630 SDValue Start = Op.getOperand(0);
9631 SDValue Vec = Op.getOperand(1);
9632 EVT VecEVT = Vec.getValueType();
9633 MVT XLenVT = Subtarget.getXLenVT();
9634
9635 // TODO: The type may need to be widened rather than split. Or widened before
9636 // it can be split.
9637 if (!isTypeLegal(VecEVT))
9638 return SDValue();
9639
9640 MVT VecVT = VecEVT.getSimpleVT();
9641 unsigned RVVOpcode = getRVVReductionOp(Opc);
9642
9643 if (VecVT.isFixedLengthVector()) {
9644 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
9645 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9646 }
9647
9648 SDValue VL = Op.getOperand(3);
9649 SDValue Mask = Op.getOperand(2);
9650 SDValue Res =
9651 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
9652 Vec, Mask, VL, DL, DAG, Subtarget);
9653 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
9654 Op->getFlags().hasNoNaNs())
9655 return Res;
9656
9657 // Propagate NaNs.
9658 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
9659 // Check if any of the elements in Vec is NaN.
9660 SDValue IsNaN = DAG.getNode(
9661 RISCVISD::SETCC_VL, DL, PredVT,
9662 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
9663 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
9664 // Check if the start value is NaN.
9665 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
9666 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
9667 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
9668 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9669 MVT ResVT = Res.getSimpleValueType();
9670 return DAG.getSelect(
9671 DL, ResVT, NoNaNs, Res,
9672 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
9673}
9674
9675SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9676 SelectionDAG &DAG) const {
9677 SDValue Vec = Op.getOperand(0);
9678 SDValue SubVec = Op.getOperand(1);
9679 MVT VecVT = Vec.getSimpleValueType();
9680 MVT SubVecVT = SubVec.getSimpleValueType();
9681
9682 SDLoc DL(Op);
9683 MVT XLenVT = Subtarget.getXLenVT();
9684 unsigned OrigIdx = Op.getConstantOperandVal(2);
9685 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9686
9687 // We don't have the ability to slide mask vectors up indexed by their i1
9688 // elements; the smallest we can do is i8. Often we are able to bitcast to
9689 // equivalent i8 vectors. Note that when inserting a fixed-length vector
9690 // into a scalable one, we might not necessarily have enough scalable
9691 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
9692 if (SubVecVT.getVectorElementType() == MVT::i1 &&
9693 (OrigIdx != 0 || !Vec.isUndef())) {
9694 if (VecVT.getVectorMinNumElements() >= 8 &&
9695 SubVecVT.getVectorMinNumElements() >= 8) {
9696 assert(OrigIdx % 8 == 0 && "Invalid index");
9697 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9698 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9699 "Unexpected mask vector lowering");
9700 OrigIdx /= 8;
9701 SubVecVT =
9702 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9703 SubVecVT.isScalableVector());
9704 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9705 VecVT.isScalableVector());
9706 Vec = DAG.getBitcast(VecVT, Vec);
9707 SubVec = DAG.getBitcast(SubVecVT, SubVec);
9708 } else {
9709 // We can't slide this mask vector up indexed by its i1 elements.
9710 // This poses a problem when we wish to insert a scalable vector which
9711 // can't be re-expressed as a larger type. Just choose the slow path and
9712 // extend to a larger type, then truncate back down.
9713 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9714 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9715 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9716 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
9717 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
9718 Op.getOperand(2));
9719 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
9720 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
9721 }
9722 }
9723
9724 // If the subvector vector is a fixed-length type and we don't know VLEN
9725 // exactly, we cannot use subregister manipulation to simplify the codegen; we
9726 // don't know which register of a LMUL group contains the specific subvector
9727 // as we only know the minimum register size. Therefore we must slide the
9728 // vector group up the full amount.
9729 const auto VLen = Subtarget.getRealVLen();
9730 if (SubVecVT.isFixedLengthVector() && !VLen) {
9731 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
9732 return Op;
9733 MVT ContainerVT = VecVT;
9734 if (VecVT.isFixedLengthVector()) {
9735 ContainerVT = getContainerForFixedLengthVector(VecVT);
9736 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9737 }
9738
9739 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
9740 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9741 DAG.getUNDEF(ContainerVT), SubVec,
9742 DAG.getVectorIdxConstant(0, DL));
9743 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9744 return DAG.getBitcast(Op.getValueType(), SubVec);
9745 }
9746
9747 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9748 DAG.getUNDEF(ContainerVT), SubVec,
9749 DAG.getVectorIdxConstant(0, DL));
9750 SDValue Mask =
9751 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9752 // Set the vector length to only the number of elements we care about. Note
9753 // that for slideup this includes the offset.
9754 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
9755 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
9756
9757 // Use tail agnostic policy if we're inserting over Vec's tail.
9759 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
9760 Policy = RISCVII::TAIL_AGNOSTIC;
9761
9762 // If we're inserting into the lowest elements, use a tail undisturbed
9763 // vmv.v.v.
9764 if (OrigIdx == 0) {
9765 SubVec =
9766 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
9767 } else {
9768 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9769 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
9770 SlideupAmt, Mask, VL, Policy);
9771 }
9772
9773 if (VecVT.isFixedLengthVector())
9774 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9775 return DAG.getBitcast(Op.getValueType(), SubVec);
9776 }
9777
9778 MVT ContainerVecVT = VecVT;
9779 if (VecVT.isFixedLengthVector()) {
9780 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
9781 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
9782 }
9783
9784 MVT ContainerSubVecVT = SubVecVT;
9785 if (SubVecVT.isFixedLengthVector()) {
9786 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
9787 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
9788 }
9789
9790 unsigned SubRegIdx;
9791 ElementCount RemIdx;
9792 // insert_subvector scales the index by vscale if the subvector is scalable,
9793 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
9794 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
9795 if (SubVecVT.isFixedLengthVector()) {
9796 assert(VLen);
9797 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
9798 auto Decompose =
9800 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
9801 SubRegIdx = Decompose.first;
9802 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
9803 (OrigIdx % Vscale));
9804 } else {
9805 auto Decompose =
9807 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
9808 SubRegIdx = Decompose.first;
9809 RemIdx = ElementCount::getScalable(Decompose.second);
9810 }
9811
9814 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
9815 bool ExactlyVecRegSized =
9816 Subtarget.expandVScale(SubVecVT.getSizeInBits())
9817 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
9818
9819 // 1. If the Idx has been completely eliminated and this subvector's size is
9820 // a vector register or a multiple thereof, or the surrounding elements are
9821 // undef, then this is a subvector insert which naturally aligns to a vector
9822 // register. These can easily be handled using subregister manipulation.
9823 // 2. If the subvector isn't an exact multiple of a valid register group size,
9824 // then the insertion must preserve the undisturbed elements of the register.
9825 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
9826 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
9827 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
9828 // of that LMUL=1 type back into the larger vector (resolving to another
9829 // subregister operation). See below for how our VSLIDEUP works. We go via a
9830 // LMUL=1 type to avoid allocating a large register group to hold our
9831 // subvector.
9832 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
9833 if (SubVecVT.isFixedLengthVector()) {
9834 // We may get NoSubRegister if inserting at index 0 and the subvec
9835 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
9836 if (SubRegIdx == RISCV::NoSubRegister) {
9837 assert(OrigIdx == 0);
9838 return Op;
9839 }
9840
9841 SDValue Insert =
9842 DAG.getTargetInsertSubreg(SubRegIdx, DL, ContainerVecVT, Vec, SubVec);
9843 if (VecVT.isFixedLengthVector())
9844 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
9845 return Insert;
9846 }
9847 return Op;
9848 }
9849
9850 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
9851 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
9852 // (in our case undisturbed). This means we can set up a subvector insertion
9853 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
9854 // size of the subvector.
9855 MVT InterSubVT = ContainerVecVT;
9856 SDValue AlignedExtract = Vec;
9857 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
9858 if (SubVecVT.isFixedLengthVector())
9859 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
9860 if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
9861 InterSubVT = getLMUL1VT(ContainerVecVT);
9862 // Extract a subvector equal to the nearest full vector register type. This
9863 // should resolve to a EXTRACT_SUBREG instruction.
9864 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
9865 DAG.getVectorIdxConstant(AlignedIdx, DL));
9866 }
9867
9868 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
9869 DAG.getUNDEF(InterSubVT), SubVec,
9870 DAG.getVectorIdxConstant(0, DL));
9871
9872 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
9873
9874 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
9875 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
9876
9877 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
9879 if (Subtarget.expandVScale(EndIndex) ==
9880 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
9881 Policy = RISCVII::TAIL_AGNOSTIC;
9882
9883 // If we're inserting into the lowest elements, use a tail undisturbed
9884 // vmv.v.v.
9885 if (RemIdx.isZero()) {
9886 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
9887 SubVec, VL);
9888 } else {
9889 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
9890
9891 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
9892 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
9893
9894 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
9895 SlideupAmt, Mask, VL, Policy);
9896 }
9897
9898 // If required, insert this subvector back into the correct vector register.
9899 // This should resolve to an INSERT_SUBREG instruction.
9900 if (ContainerVecVT.bitsGT(InterSubVT))
9901 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
9902 DAG.getVectorIdxConstant(AlignedIdx, DL));
9903
9904 if (VecVT.isFixedLengthVector())
9905 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9906
9907 // We might have bitcast from a mask type: cast back to the original type if
9908 // required.
9909 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
9910}
9911
9912SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
9913 SelectionDAG &DAG) const {
9914 SDValue Vec = Op.getOperand(0);
9915 MVT SubVecVT = Op.getSimpleValueType();
9916 MVT VecVT = Vec.getSimpleValueType();
9917
9918 SDLoc DL(Op);
9919 MVT XLenVT = Subtarget.getXLenVT();
9920 unsigned OrigIdx = Op.getConstantOperandVal(1);
9921 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9922
9923 // We don't have the ability to slide mask vectors down indexed by their i1
9924 // elements; the smallest we can do is i8. Often we are able to bitcast to
9925 // equivalent i8 vectors. Note that when extracting a fixed-length vector
9926 // from a scalable one, we might not necessarily have enough scalable
9927 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
9928 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
9929 if (VecVT.getVectorMinNumElements() >= 8 &&
9930 SubVecVT.getVectorMinNumElements() >= 8) {
9931 assert(OrigIdx % 8 == 0 && "Invalid index");
9932 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9933 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9934 "Unexpected mask vector lowering");
9935 OrigIdx /= 8;
9936 SubVecVT =
9937 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9938 SubVecVT.isScalableVector());
9939 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9940 VecVT.isScalableVector());
9941 Vec = DAG.getBitcast(VecVT, Vec);
9942 } else {
9943 // We can't slide this mask vector down, indexed by its i1 elements.
9944 // This poses a problem when we wish to extract a scalable vector which
9945 // can't be re-expressed as a larger type. Just choose the slow path and
9946 // extend to a larger type, then truncate back down.
9947 // TODO: We could probably improve this when extracting certain fixed
9948 // from fixed, where we can extract as i8 and shift the correct element
9949 // right to reach the desired subvector?
9950 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9951 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9952 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9953 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
9954 Op.getOperand(1));
9955 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
9956 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
9957 }
9958 }
9959
9960 // With an index of 0 this is a cast-like subvector, which can be performed
9961 // with subregister operations.
9962 if (OrigIdx == 0)
9963 return Op;
9964
9965 const auto VLen = Subtarget.getRealVLen();
9966
9967 // If the subvector vector is a fixed-length type and we don't know VLEN
9968 // exactly, we cannot use subregister manipulation to simplify the codegen; we
9969 // don't know which register of a LMUL group contains the specific subvector
9970 // as we only know the minimum register size. Therefore we must slide the
9971 // vector group down the full amount.
9972 if (SubVecVT.isFixedLengthVector() && !VLen) {
9973 MVT ContainerVT = VecVT;
9974 if (VecVT.isFixedLengthVector()) {
9975 ContainerVT = getContainerForFixedLengthVector(VecVT);
9976 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9977 }
9978
9979 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
9980 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
9981 if (auto ShrunkVT =
9982 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
9983 ContainerVT = *ShrunkVT;
9984 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9985 DAG.getVectorIdxConstant(0, DL));
9986 }
9987
9988 SDValue Mask =
9989 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9990 // Set the vector length to only the number of elements we care about. This
9991 // avoids sliding down elements we're going to discard straight away.
9992 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
9993 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9994 SDValue Slidedown =
9995 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9996 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
9997 // Now we can use a cast-like subvector extract to get the result.
9998 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
9999 DAG.getVectorIdxConstant(0, DL));
10000 return DAG.getBitcast(Op.getValueType(), Slidedown);
10001 }
10002
10003 if (VecVT.isFixedLengthVector()) {
10004 VecVT = getContainerForFixedLengthVector(VecVT);
10005 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10006 }
10007
10008 MVT ContainerSubVecVT = SubVecVT;
10009 if (SubVecVT.isFixedLengthVector())
10010 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10011
10012 unsigned SubRegIdx;
10013 ElementCount RemIdx;
10014 // extract_subvector scales the index by vscale if the subvector is scalable,
10015 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10016 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10017 if (SubVecVT.isFixedLengthVector()) {
10018 assert(VLen);
10019 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10020 auto Decompose =
10022 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10023 SubRegIdx = Decompose.first;
10024 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10025 (OrigIdx % Vscale));
10026 } else {
10027 auto Decompose =
10029 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10030 SubRegIdx = Decompose.first;
10031 RemIdx = ElementCount::getScalable(Decompose.second);
10032 }
10033
10034 // If the Idx has been completely eliminated then this is a subvector extract
10035 // which naturally aligns to a vector register. These can easily be handled
10036 // using subregister manipulation.
10037 if (RemIdx.isZero()) {
10038 if (SubVecVT.isFixedLengthVector()) {
10039 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, ContainerSubVecVT, Vec);
10040 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10041 }
10042 return Op;
10043 }
10044
10045 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10046 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10047 // divide exactly.
10048 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10049 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10050
10051 // If the vector type is an LMUL-group type, extract a subvector equal to the
10052 // nearest full vector register type.
10053 MVT InterSubVT = VecVT;
10054 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10055 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10056 // we should have successfully decomposed the extract into a subregister.
10057 assert(SubRegIdx != RISCV::NoSubRegister);
10058 InterSubVT = getLMUL1VT(VecVT);
10059 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);
10060 }
10061
10062 // Slide this vector register down by the desired number of elements in order
10063 // to place the desired subvector starting at element 0.
10064 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10065 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10066 if (SubVecVT.isFixedLengthVector())
10067 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10068 SDValue Slidedown =
10069 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10070 Vec, SlidedownAmt, Mask, VL);
10071
10072 // Now the vector is in the right position, extract our final subvector. This
10073 // should resolve to a COPY.
10074 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10075 DAG.getVectorIdxConstant(0, DL));
10076
10077 // We might have bitcast from a mask type: cast back to the original type if
10078 // required.
10079 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10080}
10081
10082// Widen a vector's operands to i8, then truncate its results back to the
10083// original type, typically i1. All operand and result types must be the same.
10085 SelectionDAG &DAG) {
10086 MVT VT = N.getSimpleValueType();
10087 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10089 for (SDValue Op : N->ops()) {
10090 assert(Op.getSimpleValueType() == VT &&
10091 "Operands and result must be same type");
10092 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10093 }
10094
10095 unsigned NumVals = N->getNumValues();
10096
10098 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10099 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10100 SmallVector<SDValue, 4> TruncVals;
10101 for (unsigned I = 0; I < NumVals; I++) {
10102 TruncVals.push_back(
10103 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10104 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10105 }
10106
10107 if (TruncVals.size() > 1)
10108 return DAG.getMergeValues(TruncVals, DL);
10109 return TruncVals.front();
10110}
10111
10112SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10113 SelectionDAG &DAG) const {
10114 SDLoc DL(Op);
10115 MVT VecVT = Op.getSimpleValueType();
10116
10117 assert(VecVT.isScalableVector() &&
10118 "vector_interleave on non-scalable vector!");
10119
10120 // 1 bit element vectors need to be widened to e8
10121 if (VecVT.getVectorElementType() == MVT::i1)
10122 return widenVectorOpsToi8(Op, DL, DAG);
10123
10124 // If the VT is LMUL=8, we need to split and reassemble.
10125 if (VecVT.getSizeInBits().getKnownMinValue() ==
10126 (8 * RISCV::RVVBitsPerBlock)) {
10127 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10128 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10129 EVT SplitVT = Op0Lo.getValueType();
10130
10132 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10134 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10135
10136 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10137 ResLo.getValue(0), ResHi.getValue(0));
10138 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10139 ResHi.getValue(1));
10140 return DAG.getMergeValues({Even, Odd}, DL);
10141 }
10142
10143 // Concatenate the two vectors as one vector to deinterleave
10144 MVT ConcatVT =
10147 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10148 Op.getOperand(0), Op.getOperand(1));
10149
10150 // We want to operate on all lanes, so get the mask and VL and mask for it
10151 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
10152 SDValue Passthru = DAG.getUNDEF(ConcatVT);
10153
10154 // We can deinterleave through vnsrl.wi if the element type is smaller than
10155 // ELEN
10156 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10157 SDValue Even =
10158 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
10159 SDValue Odd =
10160 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
10161 return DAG.getMergeValues({Even, Odd}, DL);
10162 }
10163
10164 // For the indices, use the same SEW to avoid an extra vsetvli
10165 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
10166 // Create a vector of even indices {0, 2, 4, ...}
10167 SDValue EvenIdx =
10168 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
10169 // Create a vector of odd indices {1, 3, 5, ... }
10170 SDValue OddIdx =
10171 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
10172
10173 // Gather the even and odd elements into two separate vectors
10174 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10175 Concat, EvenIdx, Passthru, Mask, VL);
10176 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10177 Concat, OddIdx, Passthru, Mask, VL);
10178
10179 // Extract the result half of the gather for even and odd
10180 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
10181 DAG.getVectorIdxConstant(0, DL));
10182 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
10183 DAG.getVectorIdxConstant(0, DL));
10184
10185 return DAG.getMergeValues({Even, Odd}, DL);
10186}
10187
10188SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
10189 SelectionDAG &DAG) const {
10190 SDLoc DL(Op);
10191 MVT VecVT = Op.getSimpleValueType();
10192
10193 assert(VecVT.isScalableVector() &&
10194 "vector_interleave on non-scalable vector!");
10195
10196 // i1 vectors need to be widened to i8
10197 if (VecVT.getVectorElementType() == MVT::i1)
10198 return widenVectorOpsToi8(Op, DL, DAG);
10199
10200 MVT XLenVT = Subtarget.getXLenVT();
10201 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
10202
10203 // If the VT is LMUL=8, we need to split and reassemble.
10204 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
10205 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10206 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10207 EVT SplitVT = Op0Lo.getValueType();
10208
10210 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
10212 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
10213
10214 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10215 ResLo.getValue(0), ResLo.getValue(1));
10216 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10217 ResHi.getValue(0), ResHi.getValue(1));
10218 return DAG.getMergeValues({Lo, Hi}, DL);
10219 }
10220
10221 SDValue Interleaved;
10222
10223 // If the element type is smaller than ELEN, then we can interleave with
10224 // vwaddu.vv and vwmaccu.vx
10225 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10226 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
10227 DAG, Subtarget);
10228 } else {
10229 // Otherwise, fallback to using vrgathere16.vv
10230 MVT ConcatVT =
10233 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10234 Op.getOperand(0), Op.getOperand(1));
10235
10236 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
10237
10238 // 0 1 2 3 4 5 6 7 ...
10239 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
10240
10241 // 1 1 1 1 1 1 1 1 ...
10242 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
10243
10244 // 1 0 1 0 1 0 1 0 ...
10245 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
10246 OddMask = DAG.getSetCC(
10247 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
10248 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
10250
10251 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
10252
10253 // Build up the index vector for interleaving the concatenated vector
10254 // 0 0 1 1 2 2 3 3 ...
10255 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
10256 // 0 n 1 n+1 2 n+2 3 n+3 ...
10257 Idx =
10258 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
10259
10260 // Then perform the interleave
10261 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
10262 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
10263 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
10264 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
10265 }
10266
10267 // Extract the two halves from the interleaved result
10268 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10269 DAG.getVectorIdxConstant(0, DL));
10270 SDValue Hi = DAG.getNode(
10271 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10273
10274 return DAG.getMergeValues({Lo, Hi}, DL);
10275}
10276
10277// Lower step_vector to the vid instruction. Any non-identity step value must
10278// be accounted for my manual expansion.
10279SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
10280 SelectionDAG &DAG) const {
10281 SDLoc DL(Op);
10282 MVT VT = Op.getSimpleValueType();
10283 assert(VT.isScalableVector() && "Expected scalable vector");
10284 MVT XLenVT = Subtarget.getXLenVT();
10285 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
10286 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10287 uint64_t StepValImm = Op.getConstantOperandVal(0);
10288 if (StepValImm != 1) {
10289 if (isPowerOf2_64(StepValImm)) {
10290 SDValue StepVal =
10291 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10292 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
10293 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
10294 } else {
10295 SDValue StepVal = lowerScalarSplat(
10296 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
10297 VL, VT, DL, DAG, Subtarget);
10298 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
10299 }
10300 }
10301 return StepVec;
10302}
10303
10304// Implement vector_reverse using vrgather.vv with indices determined by
10305// subtracting the id of each element from (VLMAX-1). This will convert
10306// the indices like so:
10307// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
10308// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
10309SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
10310 SelectionDAG &DAG) const {
10311 SDLoc DL(Op);
10312 MVT VecVT = Op.getSimpleValueType();
10313 if (VecVT.getVectorElementType() == MVT::i1) {
10314 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10315 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
10316 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
10317 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
10318 }
10319
10320 MVT ContainerVT = VecVT;
10321 SDValue Vec = Op.getOperand(0);
10322 if (VecVT.isFixedLengthVector()) {
10323 ContainerVT = getContainerForFixedLengthVector(VecVT);
10324 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10325 }
10326
10327 unsigned EltSize = ContainerVT.getScalarSizeInBits();
10328 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
10329 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
10330 unsigned MaxVLMAX =
10331 VecVT.isFixedLengthVector()
10332 ? VecVT.getVectorNumElements()
10333 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
10334
10335 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
10336 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
10337
10338 // If this is SEW=8 and VLMAX is potentially more than 256, we need
10339 // to use vrgatherei16.vv.
10340 if (MaxVLMAX > 256 && EltSize == 8) {
10341 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
10342 // Reverse each half, then reassemble them in reverse order.
10343 // NOTE: It's also possible that after splitting that VLMAX no longer
10344 // requires vrgatherei16.vv.
10345 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
10346 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10347 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
10348 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
10349 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
10350 // Reassemble the low and high pieces reversed.
10351 // FIXME: This is a CONCAT_VECTORS.
10352 SDValue Res =
10353 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
10354 DAG.getVectorIdxConstant(0, DL));
10355 return DAG.getNode(
10356 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
10357 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
10358 }
10359
10360 // Just promote the int type to i16 which will double the LMUL.
10361 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
10362 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
10363 }
10364
10365 // At LMUL > 1, do the index computation in 16 bits to reduce register
10366 // pressure.
10367 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
10368 IntVT.bitsGT(getLMUL1VT(IntVT))) {
10369 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
10370 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
10371 IntVT = IntVT.changeVectorElementType(MVT::i16);
10372 }
10373
10374 MVT XLenVT = Subtarget.getXLenVT();
10375 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10376
10377 // Calculate VLMAX-1 for the desired SEW.
10378 SDValue VLMinus1 = DAG.getNode(
10379 ISD::SUB, DL, XLenVT,
10380 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
10381 DAG.getConstant(1, DL, XLenVT));
10382
10383 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
10384 bool IsRV32E64 =
10385 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
10386 SDValue SplatVL;
10387 if (!IsRV32E64)
10388 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
10389 else
10390 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
10391 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
10392
10393 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
10394 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
10395 DAG.getUNDEF(IntVT), Mask, VL);
10396
10397 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
10398 DAG.getUNDEF(ContainerVT), Mask, VL);
10399 if (VecVT.isFixedLengthVector())
10400 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
10401 return Gather;
10402}
10403
10404SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
10405 SelectionDAG &DAG) const {
10406 SDLoc DL(Op);
10407 SDValue V1 = Op.getOperand(0);
10408 SDValue V2 = Op.getOperand(1);
10409 MVT XLenVT = Subtarget.getXLenVT();
10410 MVT VecVT = Op.getSimpleValueType();
10411
10412 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
10413
10414 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
10415 SDValue DownOffset, UpOffset;
10416 if (ImmValue >= 0) {
10417 // The operand is a TargetConstant, we need to rebuild it as a regular
10418 // constant.
10419 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
10420 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
10421 } else {
10422 // The operand is a TargetConstant, we need to rebuild it as a regular
10423 // constant rather than negating the original operand.
10424 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
10425 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
10426 }
10427
10428 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
10429
10430 SDValue SlideDown =
10431 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
10432 DownOffset, TrueMask, UpOffset);
10433 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
10434 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
10436}
10437
10438SDValue
10439RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
10440 SelectionDAG &DAG) const {
10441 SDLoc DL(Op);
10442 auto *Load = cast<LoadSDNode>(Op);
10443
10445 Load->getMemoryVT(),
10446 *Load->getMemOperand()) &&
10447 "Expecting a correctly-aligned load");
10448
10449 MVT VT = Op.getSimpleValueType();
10450 MVT XLenVT = Subtarget.getXLenVT();
10451 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10452
10453 // If we know the exact VLEN and our fixed length vector completely fills
10454 // the container, use a whole register load instead.
10455 const auto [MinVLMAX, MaxVLMAX] =
10456 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10457 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10458 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10459 MachineMemOperand *MMO = Load->getMemOperand();
10460 SDValue NewLoad =
10461 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
10462 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
10463 MMO->getAAInfo(), MMO->getRanges());
10464 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10465 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10466 }
10467
10468 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10469
10470 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10471 SDValue IntID = DAG.getTargetConstant(
10472 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
10473 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
10474 if (!IsMaskOp)
10475 Ops.push_back(DAG.getUNDEF(ContainerVT));
10476 Ops.push_back(Load->getBasePtr());
10477 Ops.push_back(VL);
10478 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10479 SDValue NewLoad =
10481 Load->getMemoryVT(), Load->getMemOperand());
10482
10483 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10484 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10485}
10486
10487SDValue
10488RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
10489 SelectionDAG &DAG) const {
10490 SDLoc DL(Op);
10491 auto *Store = cast<StoreSDNode>(Op);
10492
10494 Store->getMemoryVT(),
10495 *Store->getMemOperand()) &&
10496 "Expecting a correctly-aligned store");
10497
10498 SDValue StoreVal = Store->getValue();
10499 MVT VT = StoreVal.getSimpleValueType();
10500 MVT XLenVT = Subtarget.getXLenVT();
10501
10502 // If the size less than a byte, we need to pad with zeros to make a byte.
10503 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
10504 VT = MVT::v8i1;
10505 StoreVal =
10506 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
10507 StoreVal, DAG.getVectorIdxConstant(0, DL));
10508 }
10509
10510 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10511
10512 SDValue NewValue =
10513 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
10514
10515 // If we know the exact VLEN and our fixed length vector completely fills
10516 // the container, use a whole register store instead.
10517 const auto [MinVLMAX, MaxVLMAX] =
10518 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10519 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10520 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10521 MachineMemOperand *MMO = Store->getMemOperand();
10522 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
10523 MMO->getPointerInfo(), MMO->getBaseAlign(),
10524 MMO->getFlags(), MMO->getAAInfo());
10525 }
10526
10527 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10528
10529 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10530 SDValue IntID = DAG.getTargetConstant(
10531 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
10532 return DAG.getMemIntrinsicNode(
10533 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
10534 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
10535 Store->getMemoryVT(), Store->getMemOperand());
10536}
10537
10538SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
10539 SelectionDAG &DAG) const {
10540 SDLoc DL(Op);
10541 MVT VT = Op.getSimpleValueType();
10542
10543 const auto *MemSD = cast<MemSDNode>(Op);
10544 EVT MemVT = MemSD->getMemoryVT();
10545 MachineMemOperand *MMO = MemSD->getMemOperand();
10546 SDValue Chain = MemSD->getChain();
10547 SDValue BasePtr = MemSD->getBasePtr();
10548
10549 SDValue Mask, PassThru, VL;
10550 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
10551 Mask = VPLoad->getMask();
10552 PassThru = DAG.getUNDEF(VT);
10553 VL = VPLoad->getVectorLength();
10554 } else {
10555 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
10556 Mask = MLoad->getMask();
10557 PassThru = MLoad->getPassThru();
10558 }
10559
10560 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10561
10562 MVT XLenVT = Subtarget.getXLenVT();
10563
10564 MVT ContainerVT = VT;
10565 if (VT.isFixedLengthVector()) {
10566 ContainerVT = getContainerForFixedLengthVector(VT);
10567 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
10568 if (!IsUnmasked) {
10569 MVT MaskVT = getMaskTypeFor(ContainerVT);
10570 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10571 }
10572 }
10573
10574 if (!VL)
10575 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10576
10577 unsigned IntID =
10578 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
10579 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10580 if (IsUnmasked)
10581 Ops.push_back(DAG.getUNDEF(ContainerVT));
10582 else
10583 Ops.push_back(PassThru);
10584 Ops.push_back(BasePtr);
10585 if (!IsUnmasked)
10586 Ops.push_back(Mask);
10587 Ops.push_back(VL);
10588 if (!IsUnmasked)
10590
10591 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10592
10593 SDValue Result =
10594 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
10595 Chain = Result.getValue(1);
10596
10597 if (VT.isFixedLengthVector())
10598 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10599
10600 return DAG.getMergeValues({Result, Chain}, DL);
10601}
10602
10603SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
10604 SelectionDAG &DAG) const {
10605 SDLoc DL(Op);
10606
10607 const auto *MemSD = cast<MemSDNode>(Op);
10608 EVT MemVT = MemSD->getMemoryVT();
10609 MachineMemOperand *MMO = MemSD->getMemOperand();
10610 SDValue Chain = MemSD->getChain();
10611 SDValue BasePtr = MemSD->getBasePtr();
10612 SDValue Val, Mask, VL;
10613
10614 bool IsCompressingStore = false;
10615 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
10616 Val = VPStore->getValue();
10617 Mask = VPStore->getMask();
10618 VL = VPStore->getVectorLength();
10619 } else {
10620 const auto *MStore = cast<MaskedStoreSDNode>(Op);
10621 Val = MStore->getValue();
10622 Mask = MStore->getMask();
10623 IsCompressingStore = MStore->isCompressingStore();
10624 }
10625
10626 bool IsUnmasked =
10627 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
10628
10629 MVT VT = Val.getSimpleValueType();
10630 MVT XLenVT = Subtarget.getXLenVT();
10631
10632 MVT ContainerVT = VT;
10633 if (VT.isFixedLengthVector()) {
10634 ContainerVT = getContainerForFixedLengthVector(VT);
10635
10636 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
10637 if (!IsUnmasked || IsCompressingStore) {
10638 MVT MaskVT = getMaskTypeFor(ContainerVT);
10639 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10640 }
10641 }
10642
10643 if (!VL)
10644 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10645
10646 if (IsCompressingStore) {
10647 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
10648 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
10649 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
10650 VL =
10651 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
10652 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
10653 }
10654
10655 unsigned IntID =
10656 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
10657 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10658 Ops.push_back(Val);
10659 Ops.push_back(BasePtr);
10660 if (!IsUnmasked)
10661 Ops.push_back(Mask);
10662 Ops.push_back(VL);
10663
10665 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
10666}
10667
10668SDValue
10669RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
10670 SelectionDAG &DAG) const {
10671 MVT InVT = Op.getOperand(0).getSimpleValueType();
10672 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
10673
10674 MVT VT = Op.getSimpleValueType();
10675
10676 SDValue Op1 =
10677 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
10678 SDValue Op2 =
10679 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10680
10681 SDLoc DL(Op);
10682 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
10683 DAG, Subtarget);
10684 MVT MaskVT = getMaskTypeFor(ContainerVT);
10685
10686 SDValue Cmp =
10687 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10688 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
10689
10690 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
10691}
10692
10693SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
10694 SelectionDAG &DAG) const {
10695 unsigned Opc = Op.getOpcode();
10696 SDLoc DL(Op);
10697 SDValue Chain = Op.getOperand(0);
10698 SDValue Op1 = Op.getOperand(1);
10699 SDValue Op2 = Op.getOperand(2);
10700 SDValue CC = Op.getOperand(3);
10701 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
10702 MVT VT = Op.getSimpleValueType();
10703 MVT InVT = Op1.getSimpleValueType();
10704
10705 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
10706 // condition code.
10707 if (Opc == ISD::STRICT_FSETCCS) {
10708 // Expand strict_fsetccs(x, oeq) to
10709 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
10710 SDVTList VTList = Op->getVTList();
10711 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
10712 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
10713 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10714 Op2, OLECCVal);
10715 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
10716 Op1, OLECCVal);
10717 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
10718 Tmp1.getValue(1), Tmp2.getValue(1));
10719 // Tmp1 and Tmp2 might be the same node.
10720 if (Tmp1 != Tmp2)
10721 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
10722 return DAG.getMergeValues({Tmp1, OutChain}, DL);
10723 }
10724
10725 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
10726 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
10727 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
10728 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10729 Op2, OEQCCVal);
10730 SDValue Res = DAG.getNOT(DL, OEQ, VT);
10731 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
10732 }
10733 }
10734
10735 MVT ContainerInVT = InVT;
10736 if (InVT.isFixedLengthVector()) {
10737 ContainerInVT = getContainerForFixedLengthVector(InVT);
10738 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
10739 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
10740 }
10741 MVT MaskVT = getMaskTypeFor(ContainerInVT);
10742
10743 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
10744
10745 SDValue Res;
10746 if (Opc == ISD::STRICT_FSETCC &&
10747 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
10748 CCVal == ISD::SETOLE)) {
10749 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
10750 // active when both input elements are ordered.
10751 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
10752 SDValue OrderMask1 = DAG.getNode(
10753 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10754 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10755 True, VL});
10756 SDValue OrderMask2 = DAG.getNode(
10757 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10758 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10759 True, VL});
10760 Mask =
10761 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
10762 // Use Mask as the passthru operand to let the result be 0 if either of the
10763 // inputs is unordered.
10765 DAG.getVTList(MaskVT, MVT::Other),
10766 {Chain, Op1, Op2, CC, Mask, Mask, VL});
10767 } else {
10768 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
10770 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
10771 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
10772 }
10773
10774 if (VT.isFixedLengthVector()) {
10775 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10776 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10777 }
10778 return Res;
10779}
10780
10781// Lower vector ABS to smax(X, sub(0, X)).
10782SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
10783 SDLoc DL(Op);
10784 MVT VT = Op.getSimpleValueType();
10785 SDValue X = Op.getOperand(0);
10786
10787 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
10788 "Unexpected type for ISD::ABS");
10789
10790 MVT ContainerVT = VT;
10791 if (VT.isFixedLengthVector()) {
10792 ContainerVT = getContainerForFixedLengthVector(VT);
10793 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
10794 }
10795
10796 SDValue Mask, VL;
10797 if (Op->getOpcode() == ISD::VP_ABS) {
10798 Mask = Op->getOperand(1);
10799 if (VT.isFixedLengthVector())
10800 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
10801 Subtarget);
10802 VL = Op->getOperand(2);
10803 } else
10804 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10805
10806 SDValue SplatZero = DAG.getNode(
10807 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
10808 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
10809 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
10810 DAG.getUNDEF(ContainerVT), Mask, VL);
10811 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
10812 DAG.getUNDEF(ContainerVT), Mask, VL);
10813
10814 if (VT.isFixedLengthVector())
10815 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
10816 return Max;
10817}
10818
10819SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
10820 SDValue Op, SelectionDAG &DAG) const {
10821 SDLoc DL(Op);
10822 MVT VT = Op.getSimpleValueType();
10823 SDValue Mag = Op.getOperand(0);
10824 SDValue Sign = Op.getOperand(1);
10825 assert(Mag.getValueType() == Sign.getValueType() &&
10826 "Can only handle COPYSIGN with matching types.");
10827
10828 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10829 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
10830 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
10831
10832 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10833
10834 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
10835 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
10836
10837 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
10838}
10839
10840SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
10841 SDValue Op, SelectionDAG &DAG) const {
10842 MVT VT = Op.getSimpleValueType();
10843 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10844
10845 MVT I1ContainerVT =
10846 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10847
10848 SDValue CC =
10849 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
10850 SDValue Op1 =
10851 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10852 SDValue Op2 =
10853 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
10854
10855 SDLoc DL(Op);
10856 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10857
10858 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
10859 Op2, DAG.getUNDEF(ContainerVT), VL);
10860
10861 return convertFromScalableVector(VT, Select, DAG, Subtarget);
10862}
10863
10864SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
10865 SelectionDAG &DAG) const {
10866 unsigned NewOpc = getRISCVVLOp(Op);
10867 bool HasPassthruOp = hasPassthruOp(NewOpc);
10868 bool HasMask = hasMaskOp(NewOpc);
10869
10870 MVT VT = Op.getSimpleValueType();
10871 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10872
10873 // Create list of operands by converting existing ones to scalable types.
10875 for (const SDValue &V : Op->op_values()) {
10876 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10877
10878 // Pass through non-vector operands.
10879 if (!V.getValueType().isVector()) {
10880 Ops.push_back(V);
10881 continue;
10882 }
10883
10884 // "cast" fixed length vector to a scalable vector.
10885 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
10886 "Only fixed length vectors are supported!");
10887 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10888 }
10889
10890 SDLoc DL(Op);
10891 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10892 if (HasPassthruOp)
10893 Ops.push_back(DAG.getUNDEF(ContainerVT));
10894 if (HasMask)
10895 Ops.push_back(Mask);
10896 Ops.push_back(VL);
10897
10898 // StrictFP operations have two result values. Their lowered result should
10899 // have same result count.
10900 if (Op->isStrictFPOpcode()) {
10901 SDValue ScalableRes =
10902 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
10903 Op->getFlags());
10904 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10905 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
10906 }
10907
10908 SDValue ScalableRes =
10909 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
10910 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10911}
10912
10913// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
10914// * Operands of each node are assumed to be in the same order.
10915// * The EVL operand is promoted from i32 to i64 on RV64.
10916// * Fixed-length vectors are converted to their scalable-vector container
10917// types.
10918SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
10919 unsigned RISCVISDOpc = getRISCVVLOp(Op);
10920 bool HasPassthruOp = hasPassthruOp(RISCVISDOpc);
10921
10922 SDLoc DL(Op);
10923 MVT VT = Op.getSimpleValueType();
10925
10926 MVT ContainerVT = VT;
10927 if (VT.isFixedLengthVector())
10928 ContainerVT = getContainerForFixedLengthVector(VT);
10929
10930 for (const auto &OpIdx : enumerate(Op->ops())) {
10931 SDValue V = OpIdx.value();
10932 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10933 // Add dummy passthru value before the mask. Or if there isn't a mask,
10934 // before EVL.
10935 if (HasPassthruOp) {
10936 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
10937 if (MaskIdx) {
10938 if (*MaskIdx == OpIdx.index())
10939 Ops.push_back(DAG.getUNDEF(ContainerVT));
10940 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
10941 OpIdx.index()) {
10942 if (Op.getOpcode() == ISD::VP_MERGE) {
10943 // For VP_MERGE, copy the false operand instead of an undef value.
10944 Ops.push_back(Ops.back());
10945 } else {
10946 assert(Op.getOpcode() == ISD::VP_SELECT);
10947 // For VP_SELECT, add an undef value.
10948 Ops.push_back(DAG.getUNDEF(ContainerVT));
10949 }
10950 }
10951 }
10952 // Pass through operands which aren't fixed-length vectors.
10953 if (!V.getValueType().isFixedLengthVector()) {
10954 Ops.push_back(V);
10955 continue;
10956 }
10957 // "cast" fixed length vector to a scalable vector.
10958 MVT OpVT = V.getSimpleValueType();
10959 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
10960 assert(useRVVForFixedLengthVectorVT(OpVT) &&
10961 "Only fixed length vectors are supported!");
10962 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10963 }
10964
10965 if (!VT.isFixedLengthVector())
10966 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
10967
10968 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
10969
10970 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
10971}
10972
10973SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
10974 SelectionDAG &DAG) const {
10975 SDLoc DL(Op);
10976 MVT VT = Op.getSimpleValueType();
10977
10978 SDValue Src = Op.getOperand(0);
10979 // NOTE: Mask is dropped.
10980 SDValue VL = Op.getOperand(2);
10981
10982 MVT ContainerVT = VT;
10983 if (VT.isFixedLengthVector()) {
10984 ContainerVT = getContainerForFixedLengthVector(VT);
10985 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10986 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
10987 }
10988
10989 MVT XLenVT = Subtarget.getXLenVT();
10990 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10991 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10992 DAG.getUNDEF(ContainerVT), Zero, VL);
10993
10994 SDValue SplatValue = DAG.getSignedConstant(
10995 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
10996 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10997 DAG.getUNDEF(ContainerVT), SplatValue, VL);
10998
10999 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
11000 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
11001 if (!VT.isFixedLengthVector())
11002 return Result;
11003 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11004}
11005
11006SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
11007 SelectionDAG &DAG) const {
11008 SDLoc DL(Op);
11009 MVT VT = Op.getSimpleValueType();
11010
11011 SDValue Op1 = Op.getOperand(0);
11012 SDValue Op2 = Op.getOperand(1);
11013 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11014 // NOTE: Mask is dropped.
11015 SDValue VL = Op.getOperand(4);
11016
11017 MVT ContainerVT = VT;
11018 if (VT.isFixedLengthVector()) {
11019 ContainerVT = getContainerForFixedLengthVector(VT);
11020 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11021 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11022 }
11023
11025 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11026
11027 switch (Condition) {
11028 default:
11029 break;
11030 // X != Y --> (X^Y)
11031 case ISD::SETNE:
11032 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11033 break;
11034 // X == Y --> ~(X^Y)
11035 case ISD::SETEQ: {
11036 SDValue Temp =
11037 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11038 Result =
11039 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
11040 break;
11041 }
11042 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11043 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11044 case ISD::SETGT:
11045 case ISD::SETULT: {
11046 SDValue Temp =
11047 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11048 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
11049 break;
11050 }
11051 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
11052 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
11053 case ISD::SETLT:
11054 case ISD::SETUGT: {
11055 SDValue Temp =
11056 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11057 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
11058 break;
11059 }
11060 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11061 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11062 case ISD::SETGE:
11063 case ISD::SETULE: {
11064 SDValue Temp =
11065 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11066 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
11067 break;
11068 }
11069 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11070 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11071 case ISD::SETLE:
11072 case ISD::SETUGE: {
11073 SDValue Temp =
11074 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11075 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
11076 break;
11077 }
11078 }
11079
11080 if (!VT.isFixedLengthVector())
11081 return Result;
11082 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11083}
11084
11085// Lower Floating-Point/Integer Type-Convert VP SDNodes
11086SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
11087 SelectionDAG &DAG) const {
11088 SDLoc DL(Op);
11089
11090 SDValue Src = Op.getOperand(0);
11091 SDValue Mask = Op.getOperand(1);
11092 SDValue VL = Op.getOperand(2);
11093 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11094
11095 MVT DstVT = Op.getSimpleValueType();
11096 MVT SrcVT = Src.getSimpleValueType();
11097 if (DstVT.isFixedLengthVector()) {
11098 DstVT = getContainerForFixedLengthVector(DstVT);
11099 SrcVT = getContainerForFixedLengthVector(SrcVT);
11100 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11101 MVT MaskVT = getMaskTypeFor(DstVT);
11102 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11103 }
11104
11105 unsigned DstEltSize = DstVT.getScalarSizeInBits();
11106 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
11107
11109 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
11110 if (SrcVT.isInteger()) {
11111 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11112
11113 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
11116
11117 // Do we need to do any pre-widening before converting?
11118 if (SrcEltSize == 1) {
11119 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
11120 MVT XLenVT = Subtarget.getXLenVT();
11121 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11122 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11123 DAG.getUNDEF(IntVT), Zero, VL);
11124 SDValue One = DAG.getSignedConstant(
11125 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
11126 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11127 DAG.getUNDEF(IntVT), One, VL);
11128 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
11129 ZeroSplat, DAG.getUNDEF(IntVT), VL);
11130 } else if (DstEltSize > (2 * SrcEltSize)) {
11131 // Widen before converting.
11132 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
11133 DstVT.getVectorElementCount());
11134 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
11135 }
11136
11137 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11138 } else {
11139 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11140 "Wrong input/output vector types");
11141
11142 // Convert f16 to f32 then convert f32 to i64.
11143 if (DstEltSize > (2 * SrcEltSize)) {
11144 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11145 MVT InterimFVT =
11146 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11147 Src =
11148 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
11149 }
11150
11151 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11152 }
11153 } else { // Narrowing + Conversion
11154 if (SrcVT.isInteger()) {
11155 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11156 // First do a narrowing convert to an FP type half the size, then round
11157 // the FP type to a small FP type if needed.
11158
11159 MVT InterimFVT = DstVT;
11160 if (SrcEltSize > (2 * DstEltSize)) {
11161 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
11162 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11163 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11164 }
11165
11166 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
11167
11168 if (InterimFVT != DstVT) {
11169 Src = Result;
11170 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
11171 }
11172 } else {
11173 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11174 "Wrong input/output vector types");
11175 // First do a narrowing conversion to an integer half the size, then
11176 // truncate if needed.
11177
11178 if (DstEltSize == 1) {
11179 // First convert to the same size integer, then convert to mask using
11180 // setcc.
11181 assert(SrcEltSize >= 16 && "Unexpected FP type!");
11182 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
11183 DstVT.getVectorElementCount());
11184 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11185
11186 // Compare the integer result to 0. The integer should be 0 or 1/-1,
11187 // otherwise the conversion was undefined.
11188 MVT XLenVT = Subtarget.getXLenVT();
11189 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
11190 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
11191 DAG.getUNDEF(InterimIVT), SplatZero, VL);
11192 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
11193 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
11194 DAG.getUNDEF(DstVT), Mask, VL});
11195 } else {
11196 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11197 DstVT.getVectorElementCount());
11198
11199 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11200
11201 while (InterimIVT != DstVT) {
11202 SrcEltSize /= 2;
11203 Src = Result;
11204 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11205 DstVT.getVectorElementCount());
11206 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
11207 Src, Mask, VL);
11208 }
11209 }
11210 }
11211 }
11212
11213 MVT VT = Op.getSimpleValueType();
11214 if (!VT.isFixedLengthVector())
11215 return Result;
11216 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11217}
11218
11219SDValue
11220RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
11221 SelectionDAG &DAG) const {
11222 SDLoc DL(Op);
11223
11224 SDValue Op1 = Op.getOperand(0);
11225 SDValue Op2 = Op.getOperand(1);
11226 SDValue Offset = Op.getOperand(2);
11227 SDValue Mask = Op.getOperand(3);
11228 SDValue EVL1 = Op.getOperand(4);
11229 SDValue EVL2 = Op.getOperand(5);
11230
11231 const MVT XLenVT = Subtarget.getXLenVT();
11232 MVT VT = Op.getSimpleValueType();
11233 MVT ContainerVT = VT;
11234 if (VT.isFixedLengthVector()) {
11235 ContainerVT = getContainerForFixedLengthVector(VT);
11236 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11237 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11238 MVT MaskVT = getMaskTypeFor(ContainerVT);
11239 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11240 }
11241
11242 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
11243 if (IsMaskVector) {
11244 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
11245
11246 // Expand input operands
11247 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11248 DAG.getUNDEF(ContainerVT),
11249 DAG.getConstant(1, DL, XLenVT), EVL1);
11250 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11251 DAG.getUNDEF(ContainerVT),
11252 DAG.getConstant(0, DL, XLenVT), EVL1);
11253 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
11254 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
11255
11256 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11257 DAG.getUNDEF(ContainerVT),
11258 DAG.getConstant(1, DL, XLenVT), EVL2);
11259 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11260 DAG.getUNDEF(ContainerVT),
11261 DAG.getConstant(0, DL, XLenVT), EVL2);
11262 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
11263 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
11264 }
11265
11266 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
11267 SDValue DownOffset, UpOffset;
11268 if (ImmValue >= 0) {
11269 // The operand is a TargetConstant, we need to rebuild it as a regular
11270 // constant.
11271 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11272 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
11273 } else {
11274 // The operand is a TargetConstant, we need to rebuild it as a regular
11275 // constant rather than negating the original operand.
11276 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11277 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
11278 }
11279
11280 SDValue SlideDown =
11281 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11282 Op1, DownOffset, Mask, UpOffset);
11283 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
11284 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
11285
11286 if (IsMaskVector) {
11287 // Truncate Result back to a mask vector (Result has same EVL as Op2)
11288 Result = DAG.getNode(
11289 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
11290 {Result, DAG.getConstant(0, DL, ContainerVT),
11291 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
11292 Mask, EVL2});
11293 }
11294
11295 if (!VT.isFixedLengthVector())
11296 return Result;
11297 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11298}
11299
11300SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
11301 SelectionDAG &DAG) const {
11302 SDLoc DL(Op);
11303 SDValue Val = Op.getOperand(0);
11304 SDValue Mask = Op.getOperand(1);
11305 SDValue VL = Op.getOperand(2);
11306 MVT VT = Op.getSimpleValueType();
11307
11308 MVT ContainerVT = VT;
11309 if (VT.isFixedLengthVector()) {
11310 ContainerVT = getContainerForFixedLengthVector(VT);
11311 MVT MaskVT = getMaskTypeFor(ContainerVT);
11312 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11313 }
11314
11315 SDValue Result =
11316 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
11317
11318 if (!VT.isFixedLengthVector())
11319 return Result;
11320 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11321}
11322
11323SDValue
11324RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
11325 SelectionDAG &DAG) const {
11326 SDLoc DL(Op);
11327 MVT VT = Op.getSimpleValueType();
11328 MVT XLenVT = Subtarget.getXLenVT();
11329
11330 SDValue Op1 = Op.getOperand(0);
11331 SDValue Mask = Op.getOperand(1);
11332 SDValue EVL = Op.getOperand(2);
11333
11334 MVT ContainerVT = VT;
11335 if (VT.isFixedLengthVector()) {
11336 ContainerVT = getContainerForFixedLengthVector(VT);
11337 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11338 MVT MaskVT = getMaskTypeFor(ContainerVT);
11339 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11340 }
11341
11342 MVT GatherVT = ContainerVT;
11343 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
11344 // Check if we are working with mask vectors
11345 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
11346 if (IsMaskVector) {
11347 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
11348
11349 // Expand input operand
11350 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11351 DAG.getUNDEF(IndicesVT),
11352 DAG.getConstant(1, DL, XLenVT), EVL);
11353 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11354 DAG.getUNDEF(IndicesVT),
11355 DAG.getConstant(0, DL, XLenVT), EVL);
11356 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
11357 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
11358 }
11359
11360 unsigned EltSize = GatherVT.getScalarSizeInBits();
11361 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
11362 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11363 unsigned MaxVLMAX =
11364 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11365
11366 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11367 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
11368 // to use vrgatherei16.vv.
11369 // TODO: It's also possible to use vrgatherei16.vv for other types to
11370 // decrease register width for the index calculation.
11371 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11372 if (MaxVLMAX > 256 && EltSize == 8) {
11373 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
11374 // Split the vector in half and reverse each half using a full register
11375 // reverse.
11376 // Swap the halves and concatenate them.
11377 // Slide the concatenated result by (VLMax - VL).
11378 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11379 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
11380 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
11381
11382 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11383 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11384
11385 // Reassemble the low and high pieces reversed.
11386 // NOTE: this Result is unmasked (because we do not need masks for
11387 // shuffles). If in the future this has to change, we can use a SELECT_VL
11388 // between Result and UNDEF using the mask originally passed to VP_REVERSE
11389 SDValue Result =
11390 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
11391
11392 // Slide off any elements from past EVL that were reversed into the low
11393 // elements.
11394 unsigned MinElts = GatherVT.getVectorMinNumElements();
11395 SDValue VLMax =
11396 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
11397 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
11398
11399 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
11400 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
11401
11402 if (IsMaskVector) {
11403 // Truncate Result back to a mask vector
11404 Result =
11405 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
11406 {Result, DAG.getConstant(0, DL, GatherVT),
11408 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11409 }
11410
11411 if (!VT.isFixedLengthVector())
11412 return Result;
11413 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11414 }
11415
11416 // Just promote the int type to i16 which will double the LMUL.
11417 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
11418 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11419 }
11420
11421 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
11422 SDValue VecLen =
11423 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
11424 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11425 DAG.getUNDEF(IndicesVT), VecLen, EVL);
11426 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
11427 DAG.getUNDEF(IndicesVT), Mask, EVL);
11428 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
11429 DAG.getUNDEF(GatherVT), Mask, EVL);
11430
11431 if (IsMaskVector) {
11432 // Truncate Result back to a mask vector
11433 Result = DAG.getNode(
11434 RISCVISD::SETCC_VL, DL, ContainerVT,
11435 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
11436 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11437 }
11438
11439 if (!VT.isFixedLengthVector())
11440 return Result;
11441 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11442}
11443
11444SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
11445 SelectionDAG &DAG) const {
11446 MVT VT = Op.getSimpleValueType();
11447 if (VT.getVectorElementType() != MVT::i1)
11448 return lowerVPOp(Op, DAG);
11449
11450 // It is safe to drop mask parameter as masked-off elements are undef.
11451 SDValue Op1 = Op->getOperand(0);
11452 SDValue Op2 = Op->getOperand(1);
11453 SDValue VL = Op->getOperand(3);
11454
11455 MVT ContainerVT = VT;
11456 const bool IsFixed = VT.isFixedLengthVector();
11457 if (IsFixed) {
11458 ContainerVT = getContainerForFixedLengthVector(VT);
11459 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11460 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11461 }
11462
11463 SDLoc DL(Op);
11464 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
11465 if (!IsFixed)
11466 return Val;
11467 return convertFromScalableVector(VT, Val, DAG, Subtarget);
11468}
11469
11470SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
11471 SelectionDAG &DAG) const {
11472 SDLoc DL(Op);
11473 MVT XLenVT = Subtarget.getXLenVT();
11474 MVT VT = Op.getSimpleValueType();
11475 MVT ContainerVT = VT;
11476 if (VT.isFixedLengthVector())
11477 ContainerVT = getContainerForFixedLengthVector(VT);
11478
11479 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11480
11481 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
11482 // Check if the mask is known to be all ones
11483 SDValue Mask = VPNode->getMask();
11484 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11485
11486 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
11487 : Intrinsic::riscv_vlse_mask,
11488 DL, XLenVT);
11489 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
11490 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
11491 VPNode->getStride()};
11492 if (!IsUnmasked) {
11493 if (VT.isFixedLengthVector()) {
11494 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11495 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11496 }
11497 Ops.push_back(Mask);
11498 }
11499 Ops.push_back(VPNode->getVectorLength());
11500 if (!IsUnmasked) {
11501 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
11502 Ops.push_back(Policy);
11503 }
11504
11505 SDValue Result =
11507 VPNode->getMemoryVT(), VPNode->getMemOperand());
11508 SDValue Chain = Result.getValue(1);
11509
11510 if (VT.isFixedLengthVector())
11511 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11512
11513 return DAG.getMergeValues({Result, Chain}, DL);
11514}
11515
11516SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
11517 SelectionDAG &DAG) const {
11518 SDLoc DL(Op);
11519 MVT XLenVT = Subtarget.getXLenVT();
11520
11521 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
11522 SDValue StoreVal = VPNode->getValue();
11523 MVT VT = StoreVal.getSimpleValueType();
11524 MVT ContainerVT = VT;
11525 if (VT.isFixedLengthVector()) {
11526 ContainerVT = getContainerForFixedLengthVector(VT);
11527 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11528 }
11529
11530 // Check if the mask is known to be all ones
11531 SDValue Mask = VPNode->getMask();
11532 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11533
11534 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
11535 : Intrinsic::riscv_vsse_mask,
11536 DL, XLenVT);
11537 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
11538 VPNode->getBasePtr(), VPNode->getStride()};
11539 if (!IsUnmasked) {
11540 if (VT.isFixedLengthVector()) {
11541 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11542 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11543 }
11544 Ops.push_back(Mask);
11545 }
11546 Ops.push_back(VPNode->getVectorLength());
11547
11548 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
11549 Ops, VPNode->getMemoryVT(),
11550 VPNode->getMemOperand());
11551}
11552
11553// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
11554// matched to a RVV indexed load. The RVV indexed load instructions only
11555// support the "unsigned unscaled" addressing mode; indices are implicitly
11556// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11557// signed or scaled indexing is extended to the XLEN value type and scaled
11558// accordingly.
11559SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
11560 SelectionDAG &DAG) const {
11561 SDLoc DL(Op);
11562 MVT VT = Op.getSimpleValueType();
11563
11564 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11565 EVT MemVT = MemSD->getMemoryVT();
11566 MachineMemOperand *MMO = MemSD->getMemOperand();
11567 SDValue Chain = MemSD->getChain();
11568 SDValue BasePtr = MemSD->getBasePtr();
11569
11570 [[maybe_unused]] ISD::LoadExtType LoadExtType;
11571 SDValue Index, Mask, PassThru, VL;
11572
11573 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
11574 Index = VPGN->getIndex();
11575 Mask = VPGN->getMask();
11576 PassThru = DAG.getUNDEF(VT);
11577 VL = VPGN->getVectorLength();
11578 // VP doesn't support extending loads.
11580 } else {
11581 // Else it must be a MGATHER.
11582 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
11583 Index = MGN->getIndex();
11584 Mask = MGN->getMask();
11585 PassThru = MGN->getPassThru();
11586 LoadExtType = MGN->getExtensionType();
11587 }
11588
11589 MVT IndexVT = Index.getSimpleValueType();
11590 MVT XLenVT = Subtarget.getXLenVT();
11591
11593 "Unexpected VTs!");
11594 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11595 // Targets have to explicitly opt-in for extending vector loads.
11596 assert(LoadExtType == ISD::NON_EXTLOAD &&
11597 "Unexpected extending MGATHER/VP_GATHER");
11598
11599 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11600 // the selection of the masked intrinsics doesn't do this for us.
11601 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11602
11603 MVT ContainerVT = VT;
11604 if (VT.isFixedLengthVector()) {
11605 ContainerVT = getContainerForFixedLengthVector(VT);
11606 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11607 ContainerVT.getVectorElementCount());
11608
11609 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11610
11611 if (!IsUnmasked) {
11612 MVT MaskVT = getMaskTypeFor(ContainerVT);
11613 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11614 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11615 }
11616 }
11617
11618 if (!VL)
11619 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11620
11621 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11622 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11623 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11624 }
11625
11626 unsigned IntID =
11627 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
11628 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11629 if (IsUnmasked)
11630 Ops.push_back(DAG.getUNDEF(ContainerVT));
11631 else
11632 Ops.push_back(PassThru);
11633 Ops.push_back(BasePtr);
11634 Ops.push_back(Index);
11635 if (!IsUnmasked)
11636 Ops.push_back(Mask);
11637 Ops.push_back(VL);
11638 if (!IsUnmasked)
11640
11641 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11642 SDValue Result =
11643 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11644 Chain = Result.getValue(1);
11645
11646 if (VT.isFixedLengthVector())
11647 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11648
11649 return DAG.getMergeValues({Result, Chain}, DL);
11650}
11651
11652// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
11653// matched to a RVV indexed store. The RVV indexed store instructions only
11654// support the "unsigned unscaled" addressing mode; indices are implicitly
11655// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11656// signed or scaled indexing is extended to the XLEN value type and scaled
11657// accordingly.
11658SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
11659 SelectionDAG &DAG) const {
11660 SDLoc DL(Op);
11661 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11662 EVT MemVT = MemSD->getMemoryVT();
11663 MachineMemOperand *MMO = MemSD->getMemOperand();
11664 SDValue Chain = MemSD->getChain();
11665 SDValue BasePtr = MemSD->getBasePtr();
11666
11667 [[maybe_unused]] bool IsTruncatingStore = false;
11668 SDValue Index, Mask, Val, VL;
11669
11670 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
11671 Index = VPSN->getIndex();
11672 Mask = VPSN->getMask();
11673 Val = VPSN->getValue();
11674 VL = VPSN->getVectorLength();
11675 // VP doesn't support truncating stores.
11676 IsTruncatingStore = false;
11677 } else {
11678 // Else it must be a MSCATTER.
11679 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
11680 Index = MSN->getIndex();
11681 Mask = MSN->getMask();
11682 Val = MSN->getValue();
11683 IsTruncatingStore = MSN->isTruncatingStore();
11684 }
11685
11686 MVT VT = Val.getSimpleValueType();
11687 MVT IndexVT = Index.getSimpleValueType();
11688 MVT XLenVT = Subtarget.getXLenVT();
11689
11691 "Unexpected VTs!");
11692 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11693 // Targets have to explicitly opt-in for extending vector loads and
11694 // truncating vector stores.
11695 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
11696
11697 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11698 // the selection of the masked intrinsics doesn't do this for us.
11699 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11700
11701 MVT ContainerVT = VT;
11702 if (VT.isFixedLengthVector()) {
11703 ContainerVT = getContainerForFixedLengthVector(VT);
11704 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11705 ContainerVT.getVectorElementCount());
11706
11707 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11708 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11709
11710 if (!IsUnmasked) {
11711 MVT MaskVT = getMaskTypeFor(ContainerVT);
11712 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11713 }
11714 }
11715
11716 if (!VL)
11717 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11718
11719 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11720 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11721 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11722 }
11723
11724 unsigned IntID =
11725 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
11726 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11727 Ops.push_back(Val);
11728 Ops.push_back(BasePtr);
11729 Ops.push_back(Index);
11730 if (!IsUnmasked)
11731 Ops.push_back(Mask);
11732 Ops.push_back(VL);
11733
11735 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11736}
11737
11738SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
11739 SelectionDAG &DAG) const {
11740 const MVT XLenVT = Subtarget.getXLenVT();
11741 SDLoc DL(Op);
11742 SDValue Chain = Op->getOperand(0);
11743 SDValue SysRegNo = DAG.getTargetConstant(
11744 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11745 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
11746 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
11747
11748 // Encoding used for rounding mode in RISC-V differs from that used in
11749 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
11750 // table, which consists of a sequence of 4-bit fields, each representing
11751 // corresponding FLT_ROUNDS mode.
11752 static const int Table =
11758
11759 SDValue Shift =
11760 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
11761 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11762 DAG.getConstant(Table, DL, XLenVT), Shift);
11763 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11764 DAG.getConstant(7, DL, XLenVT));
11765
11766 return DAG.getMergeValues({Masked, Chain}, DL);
11767}
11768
11769SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
11770 SelectionDAG &DAG) const {
11771 const MVT XLenVT = Subtarget.getXLenVT();
11772 SDLoc DL(Op);
11773 SDValue Chain = Op->getOperand(0);
11774 SDValue RMValue = Op->getOperand(1);
11775 SDValue SysRegNo = DAG.getTargetConstant(
11776 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11777
11778 // Encoding used for rounding mode in RISC-V differs from that used in
11779 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
11780 // a table, which consists of a sequence of 4-bit fields, each representing
11781 // corresponding RISC-V mode.
11782 static const unsigned Table =
11788
11789 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
11790
11791 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
11792 DAG.getConstant(2, DL, XLenVT));
11793 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11794 DAG.getConstant(Table, DL, XLenVT), Shift);
11795 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11796 DAG.getConstant(0x7, DL, XLenVT));
11797 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
11798 RMValue);
11799}
11800
11801SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
11802 SelectionDAG &DAG) const {
11804
11805 bool isRISCV64 = Subtarget.is64Bit();
11806 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11807
11808 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
11809 return DAG.getFrameIndex(FI, PtrVT);
11810}
11811
11812// Returns the opcode of the target-specific SDNode that implements the 32-bit
11813// form of the given Opcode.
11814static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
11815 switch (Opcode) {
11816 default:
11817 llvm_unreachable("Unexpected opcode");
11818 case ISD::SHL:
11819 return RISCVISD::SLLW;
11820 case ISD::SRA:
11821 return RISCVISD::SRAW;
11822 case ISD::SRL:
11823 return RISCVISD::SRLW;
11824 case ISD::SDIV:
11825 return RISCVISD::DIVW;
11826 case ISD::UDIV:
11827 return RISCVISD::DIVUW;
11828 case ISD::UREM:
11829 return RISCVISD::REMUW;
11830 case ISD::ROTL:
11831 return RISCVISD::ROLW;
11832 case ISD::ROTR:
11833 return RISCVISD::RORW;
11834 }
11835}
11836
11837// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
11838// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
11839// otherwise be promoted to i64, making it difficult to select the
11840// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
11841// type i8/i16/i32 is lost.
11843 unsigned ExtOpc = ISD::ANY_EXTEND) {
11844 SDLoc DL(N);
11845 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
11846 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
11847 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
11848 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
11849 // ReplaceNodeResults requires we maintain the same type for the return value.
11850 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
11851}
11852
11853// Converts the given 32-bit operation to a i64 operation with signed extension
11854// semantic to reduce the signed extension instructions.
11856 SDLoc DL(N);
11857 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11858 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11859 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
11860 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
11861 DAG.getValueType(MVT::i32));
11862 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
11863}
11864
11867 SelectionDAG &DAG) const {
11868 SDLoc DL(N);
11869 switch (N->getOpcode()) {
11870 default:
11871 llvm_unreachable("Don't know how to custom type legalize this operation!");
11874 case ISD::FP_TO_SINT:
11875 case ISD::FP_TO_UINT: {
11876 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11877 "Unexpected custom legalisation");
11878 bool IsStrict = N->isStrictFPOpcode();
11879 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
11880 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
11881 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
11882 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
11884 if (!isTypeLegal(Op0.getValueType()))
11885 return;
11886 if (IsStrict) {
11887 SDValue Chain = N->getOperand(0);
11888 // In absense of Zfh, promote f16 to f32, then convert.
11889 if (Op0.getValueType() == MVT::f16 &&
11890 !Subtarget.hasStdExtZfhOrZhinx()) {
11891 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
11892 {Chain, Op0});
11893 Chain = Op0.getValue(1);
11894 }
11895 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
11897 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
11898 SDValue Res = DAG.getNode(
11899 Opc, DL, VTs, Chain, Op0,
11900 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11901 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11902 Results.push_back(Res.getValue(1));
11903 return;
11904 }
11905 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
11906 // convert.
11907 if ((Op0.getValueType() == MVT::f16 &&
11908 !Subtarget.hasStdExtZfhOrZhinx()) ||
11909 Op0.getValueType() == MVT::bf16)
11910 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11911
11912 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
11913 SDValue Res =
11914 DAG.getNode(Opc, DL, MVT::i64, Op0,
11915 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11916 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11917 return;
11918 }
11919 // If the FP type needs to be softened, emit a library call using the 'si'
11920 // version. If we left it to default legalization we'd end up with 'di'. If
11921 // the FP type doesn't need to be softened just let generic type
11922 // legalization promote the result type.
11923 RTLIB::Libcall LC;
11924 if (IsSigned)
11925 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
11926 else
11927 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
11928 MakeLibCallOptions CallOptions;
11929 EVT OpVT = Op0.getValueType();
11930 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
11931 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
11932 SDValue Result;
11933 std::tie(Result, Chain) =
11934 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
11935 Results.push_back(Result);
11936 if (IsStrict)
11937 Results.push_back(Chain);
11938 break;
11939 }
11940 case ISD::LROUND: {
11941 SDValue Op0 = N->getOperand(0);
11942 EVT Op0VT = Op0.getValueType();
11943 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
11945 if (!isTypeLegal(Op0VT))
11946 return;
11947
11948 // In absense of Zfh, promote f16 to f32, then convert.
11949 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
11950 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11951
11952 SDValue Res =
11953 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
11954 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
11955 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11956 return;
11957 }
11958 // If the FP type needs to be softened, emit a library call to lround. We'll
11959 // need to truncate the result. We assume any value that doesn't fit in i32
11960 // is allowed to return an unspecified value.
11961 RTLIB::Libcall LC =
11962 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
11963 MakeLibCallOptions CallOptions;
11964 EVT OpVT = Op0.getValueType();
11965 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
11966 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
11967 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
11968 Results.push_back(Result);
11969 break;
11970 }
11973 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
11974 "has custom type legalization on riscv32");
11975
11976 SDValue LoCounter, HiCounter;
11977 MVT XLenVT = Subtarget.getXLenVT();
11978 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
11979 LoCounter = DAG.getTargetConstant(
11980 RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT);
11981 HiCounter = DAG.getTargetConstant(
11982 RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT);
11983 } else {
11984 LoCounter = DAG.getTargetConstant(
11985 RISCVSysReg::lookupSysRegByName("TIME")->Encoding, DL, XLenVT);
11986 HiCounter = DAG.getTargetConstant(
11987 RISCVSysReg::lookupSysRegByName("TIMEH")->Encoding, DL, XLenVT);
11988 }
11989 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11991 N->getOperand(0), LoCounter, HiCounter);
11992
11993 Results.push_back(
11994 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
11995 Results.push_back(RCW.getValue(2));
11996 break;
11997 }
11998 case ISD::LOAD: {
11999 if (!ISD::isNON_EXTLoad(N))
12000 return;
12001
12002 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
12003 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
12004 LoadSDNode *Ld = cast<LoadSDNode>(N);
12005
12006 SDLoc dl(N);
12007 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
12008 Ld->getBasePtr(), Ld->getMemoryVT(),
12009 Ld->getMemOperand());
12010 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
12011 Results.push_back(Res.getValue(1));
12012 return;
12013 }
12014 case ISD::MUL: {
12015 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
12016 unsigned XLen = Subtarget.getXLen();
12017 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
12018 if (Size > XLen) {
12019 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
12020 SDValue LHS = N->getOperand(0);
12021 SDValue RHS = N->getOperand(1);
12022 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
12023
12024 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
12025 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
12026 // We need exactly one side to be unsigned.
12027 if (LHSIsU == RHSIsU)
12028 return;
12029
12030 auto MakeMULPair = [&](SDValue S, SDValue U) {
12031 MVT XLenVT = Subtarget.getXLenVT();
12032 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
12033 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
12034 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
12035 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
12036 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
12037 };
12038
12039 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
12040 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
12041
12042 // The other operand should be signed, but still prefer MULH when
12043 // possible.
12044 if (RHSIsU && LHSIsS && !RHSIsS)
12045 Results.push_back(MakeMULPair(LHS, RHS));
12046 else if (LHSIsU && RHSIsS && !LHSIsS)
12047 Results.push_back(MakeMULPair(RHS, LHS));
12048
12049 return;
12050 }
12051 [[fallthrough]];
12052 }
12053 case ISD::ADD:
12054 case ISD::SUB:
12055 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12056 "Unexpected custom legalisation");
12057 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
12058 break;
12059 case ISD::SHL:
12060 case ISD::SRA:
12061 case ISD::SRL:
12062 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12063 "Unexpected custom legalisation");
12064 if (N->getOperand(1).getOpcode() != ISD::Constant) {
12065 // If we can use a BSET instruction, allow default promotion to apply.
12066 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
12067 isOneConstant(N->getOperand(0)))
12068 break;
12069 Results.push_back(customLegalizeToWOp(N, DAG));
12070 break;
12071 }
12072
12073 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
12074 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
12075 // shift amount.
12076 if (N->getOpcode() == ISD::SHL) {
12077 SDLoc DL(N);
12078 SDValue NewOp0 =
12079 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12080 SDValue NewOp1 =
12081 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
12082 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
12083 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12084 DAG.getValueType(MVT::i32));
12085 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12086 }
12087
12088 break;
12089 case ISD::ROTL:
12090 case ISD::ROTR:
12091 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12092 "Unexpected custom legalisation");
12093 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
12094 Subtarget.hasVendorXTHeadBb()) &&
12095 "Unexpected custom legalization");
12096 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
12097 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
12098 return;
12099 Results.push_back(customLegalizeToWOp(N, DAG));
12100 break;
12101 case ISD::CTTZ:
12103 case ISD::CTLZ:
12104 case ISD::CTLZ_ZERO_UNDEF: {
12105 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12106 "Unexpected custom legalisation");
12107
12108 SDValue NewOp0 =
12109 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12110 bool IsCTZ =
12111 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
12112 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
12113 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
12114 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12115 return;
12116 }
12117 case ISD::SDIV:
12118 case ISD::UDIV:
12119 case ISD::UREM: {
12120 MVT VT = N->getSimpleValueType(0);
12121 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
12122 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
12123 "Unexpected custom legalisation");
12124 // Don't promote division/remainder by constant since we should expand those
12125 // to multiply by magic constant.
12127 if (N->getOperand(1).getOpcode() == ISD::Constant &&
12128 !isIntDivCheap(N->getValueType(0), Attr))
12129 return;
12130
12131 // If the input is i32, use ANY_EXTEND since the W instructions don't read
12132 // the upper 32 bits. For other types we need to sign or zero extend
12133 // based on the opcode.
12134 unsigned ExtOpc = ISD::ANY_EXTEND;
12135 if (VT != MVT::i32)
12136 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
12138
12139 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
12140 break;
12141 }
12142 case ISD::SADDO: {
12143 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12144 "Unexpected custom legalisation");
12145
12146 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
12147 // use the default legalization.
12148 if (!isa<ConstantSDNode>(N->getOperand(1)))
12149 return;
12150
12151 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12152 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12153 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
12154 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12155 DAG.getValueType(MVT::i32));
12156
12157 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
12158
12159 // For an addition, the result should be less than one of the operands (LHS)
12160 // if and only if the other operand (RHS) is negative, otherwise there will
12161 // be overflow.
12162 // For a subtraction, the result should be less than one of the operands
12163 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
12164 // otherwise there will be overflow.
12165 EVT OType = N->getValueType(1);
12166 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
12167 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
12168
12169 SDValue Overflow =
12170 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
12171 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12172 Results.push_back(Overflow);
12173 return;
12174 }
12175 case ISD::UADDO:
12176 case ISD::USUBO: {
12177 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12178 "Unexpected custom legalisation");
12179 bool IsAdd = N->getOpcode() == ISD::UADDO;
12180 // Create an ADDW or SUBW.
12181 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12182 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12183 SDValue Res =
12184 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
12185 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12186 DAG.getValueType(MVT::i32));
12187
12188 SDValue Overflow;
12189 if (IsAdd && isOneConstant(RHS)) {
12190 // Special case uaddo X, 1 overflowed if the addition result is 0.
12191 // The general case (X + C) < C is not necessarily beneficial. Although we
12192 // reduce the live range of X, we may introduce the materialization of
12193 // constant C, especially when the setcc result is used by branch. We have
12194 // no compare with constant and branch instructions.
12195 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
12196 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
12197 } else if (IsAdd && isAllOnesConstant(RHS)) {
12198 // Special case uaddo X, -1 overflowed if X != 0.
12199 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
12200 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
12201 } else {
12202 // Sign extend the LHS and perform an unsigned compare with the ADDW
12203 // result. Since the inputs are sign extended from i32, this is equivalent
12204 // to comparing the lower 32 bits.
12205 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12206 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
12207 IsAdd ? ISD::SETULT : ISD::SETUGT);
12208 }
12209
12210 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12211 Results.push_back(Overflow);
12212 return;
12213 }
12214 case ISD::UADDSAT:
12215 case ISD::USUBSAT: {
12216 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12217 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
12218 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
12219 // promotion for UADDO/USUBO.
12220 Results.push_back(expandAddSubSat(N, DAG));
12221 return;
12222 }
12223 case ISD::SADDSAT:
12224 case ISD::SSUBSAT: {
12225 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12226 "Unexpected custom legalisation");
12227 Results.push_back(expandAddSubSat(N, DAG));
12228 return;
12229 }
12230 case ISD::ABS: {
12231 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12232 "Unexpected custom legalisation");
12233
12234 if (Subtarget.hasStdExtZbb()) {
12235 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
12236 // This allows us to remember that the result is sign extended. Expanding
12237 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
12238 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
12239 N->getOperand(0));
12240 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
12241 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
12242 return;
12243 }
12244
12245 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
12246 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12247
12248 // Freeze the source so we can increase it's use count.
12249 Src = DAG.getFreeze(Src);
12250
12251 // Copy sign bit to all bits using the sraiw pattern.
12252 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
12253 DAG.getValueType(MVT::i32));
12254 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
12255 DAG.getConstant(31, DL, MVT::i64));
12256
12257 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
12258 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
12259
12260 // NOTE: The result is only required to be anyextended, but sext is
12261 // consistent with type legalization of sub.
12262 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
12263 DAG.getValueType(MVT::i32));
12264 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12265 return;
12266 }
12267 case ISD::BITCAST: {
12268 EVT VT = N->getValueType(0);
12269 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
12270 SDValue Op0 = N->getOperand(0);
12271 EVT Op0VT = Op0.getValueType();
12272 MVT XLenVT = Subtarget.getXLenVT();
12273 if (VT == MVT::i16 && Op0VT == MVT::f16 &&
12274 Subtarget.hasStdExtZfhminOrZhinxmin()) {
12275 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12276 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12277 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
12278 Subtarget.hasStdExtZfbfmin()) {
12279 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12280 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12281 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
12282 Subtarget.hasStdExtFOrZfinx()) {
12283 SDValue FPConv =
12284 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
12285 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
12286 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32) {
12287 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
12288 DAG.getVTList(MVT::i32, MVT::i32), Op0);
12289 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
12290 NewReg.getValue(0), NewReg.getValue(1));
12291 Results.push_back(RetReg);
12292 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
12293 isTypeLegal(Op0VT)) {
12294 // Custom-legalize bitcasts from fixed-length vector types to illegal
12295 // scalar types in order to improve codegen. Bitcast the vector to a
12296 // one-element vector type whose element type is the same as the result
12297 // type, and extract the first element.
12298 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
12299 if (isTypeLegal(BVT)) {
12300 SDValue BVec = DAG.getBitcast(BVT, Op0);
12301 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
12302 DAG.getVectorIdxConstant(0, DL)));
12303 }
12304 }
12305 break;
12306 }
12307 case RISCVISD::BREV8:
12308 case RISCVISD::ORC_B: {
12309 MVT VT = N->getSimpleValueType(0);
12310 MVT XLenVT = Subtarget.getXLenVT();
12311 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
12312 "Unexpected custom legalisation");
12313 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
12314 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
12315 "Unexpected extension");
12316 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
12317 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
12318 // ReplaceNodeResults requires we maintain the same type for the return
12319 // value.
12320 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
12321 break;
12322 }
12324 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
12325 // type is illegal (currently only vXi64 RV32).
12326 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
12327 // transferred to the destination register. We issue two of these from the
12328 // upper- and lower- halves of the SEW-bit vector element, slid down to the
12329 // first element.
12330 SDValue Vec = N->getOperand(0);
12331 SDValue Idx = N->getOperand(1);
12332
12333 // The vector type hasn't been legalized yet so we can't issue target
12334 // specific nodes if it needs legalization.
12335 // FIXME: We would manually legalize if it's important.
12336 if (!isTypeLegal(Vec.getValueType()))
12337 return;
12338
12339 MVT VecVT = Vec.getSimpleValueType();
12340
12341 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
12342 VecVT.getVectorElementType() == MVT::i64 &&
12343 "Unexpected EXTRACT_VECTOR_ELT legalization");
12344
12345 // If this is a fixed vector, we need to convert it to a scalable vector.
12346 MVT ContainerVT = VecVT;
12347 if (VecVT.isFixedLengthVector()) {
12348 ContainerVT = getContainerForFixedLengthVector(VecVT);
12349 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12350 }
12351
12352 MVT XLenVT = Subtarget.getXLenVT();
12353
12354 // Use a VL of 1 to avoid processing more elements than we need.
12355 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
12356
12357 // Unless the index is known to be 0, we must slide the vector down to get
12358 // the desired element into index 0.
12359 if (!isNullConstant(Idx)) {
12360 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12361 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
12362 }
12363
12364 // Extract the lower XLEN bits of the correct vector element.
12365 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12366
12367 // To extract the upper XLEN bits of the vector element, shift the first
12368 // element right by 32 bits and re-extract the lower XLEN bits.
12369 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12370 DAG.getUNDEF(ContainerVT),
12371 DAG.getConstant(32, DL, XLenVT), VL);
12372 SDValue LShr32 =
12373 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
12374 DAG.getUNDEF(ContainerVT), Mask, VL);
12375
12376 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12377
12378 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12379 break;
12380 }
12382 unsigned IntNo = N->getConstantOperandVal(0);
12383 switch (IntNo) {
12384 default:
12386 "Don't know how to custom type legalize this intrinsic!");
12387 case Intrinsic::experimental_get_vector_length: {
12388 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
12389 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12390 return;
12391 }
12392 case Intrinsic::experimental_cttz_elts: {
12393 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
12394 Results.push_back(
12395 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
12396 return;
12397 }
12398 case Intrinsic::riscv_orc_b:
12399 case Intrinsic::riscv_brev8:
12400 case Intrinsic::riscv_sha256sig0:
12401 case Intrinsic::riscv_sha256sig1:
12402 case Intrinsic::riscv_sha256sum0:
12403 case Intrinsic::riscv_sha256sum1:
12404 case Intrinsic::riscv_sm3p0:
12405 case Intrinsic::riscv_sm3p1: {
12406 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12407 return;
12408 unsigned Opc;
12409 switch (IntNo) {
12410 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
12411 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
12412 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
12413 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
12414 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
12415 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
12416 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
12417 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
12418 }
12419
12420 SDValue NewOp =
12421 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12422 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
12423 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12424 return;
12425 }
12426 case Intrinsic::riscv_sm4ks:
12427 case Intrinsic::riscv_sm4ed: {
12428 unsigned Opc =
12429 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
12430 SDValue NewOp0 =
12431 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12432 SDValue NewOp1 =
12433 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12434 SDValue Res =
12435 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
12436 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12437 return;
12438 }
12439 case Intrinsic::riscv_mopr: {
12440 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12441 return;
12442 SDValue NewOp =
12443 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12444 SDValue Res = DAG.getNode(
12445 RISCVISD::MOPR, DL, MVT::i64, NewOp,
12446 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
12447 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12448 return;
12449 }
12450 case Intrinsic::riscv_moprr: {
12451 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12452 return;
12453 SDValue NewOp0 =
12454 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12455 SDValue NewOp1 =
12456 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12457 SDValue Res = DAG.getNode(
12458 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
12459 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
12460 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12461 return;
12462 }
12463 case Intrinsic::riscv_clmul: {
12464 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12465 return;
12466
12467 SDValue NewOp0 =
12468 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12469 SDValue NewOp1 =
12470 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12471 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
12472 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12473 return;
12474 }
12475 case Intrinsic::riscv_clmulh:
12476 case Intrinsic::riscv_clmulr: {
12477 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12478 return;
12479
12480 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
12481 // to the full 128-bit clmul result of multiplying two xlen values.
12482 // Perform clmulr or clmulh on the shifted values. Finally, extract the
12483 // upper 32 bits.
12484 //
12485 // The alternative is to mask the inputs to 32 bits and use clmul, but
12486 // that requires two shifts to mask each input without zext.w.
12487 // FIXME: If the inputs are known zero extended or could be freely
12488 // zero extended, the mask form would be better.
12489 SDValue NewOp0 =
12490 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12491 SDValue NewOp1 =
12492 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12493 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
12494 DAG.getConstant(32, DL, MVT::i64));
12495 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
12496 DAG.getConstant(32, DL, MVT::i64));
12497 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
12499 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
12500 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
12501 DAG.getConstant(32, DL, MVT::i64));
12502 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12503 return;
12504 }
12505 case Intrinsic::riscv_vmv_x_s: {
12506 EVT VT = N->getValueType(0);
12507 MVT XLenVT = Subtarget.getXLenVT();
12508 if (VT.bitsLT(XLenVT)) {
12509 // Simple case just extract using vmv.x.s and truncate.
12510 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
12511 Subtarget.getXLenVT(), N->getOperand(1));
12512 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
12513 return;
12514 }
12515
12516 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
12517 "Unexpected custom legalization");
12518
12519 // We need to do the move in two steps.
12520 SDValue Vec = N->getOperand(1);
12521 MVT VecVT = Vec.getSimpleValueType();
12522
12523 // First extract the lower XLEN bits of the element.
12524 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12525
12526 // To extract the upper XLEN bits of the vector element, shift the first
12527 // element right by 32 bits and re-extract the lower XLEN bits.
12528 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
12529
12530 SDValue ThirtyTwoV =
12531 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
12532 DAG.getConstant(32, DL, XLenVT), VL);
12533 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
12534 DAG.getUNDEF(VecVT), Mask, VL);
12535 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12536
12537 Results.push_back(
12538 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12539 break;
12540 }
12541 }
12542 break;
12543 }
12544 case ISD::VECREDUCE_ADD:
12545 case ISD::VECREDUCE_AND:
12546 case ISD::VECREDUCE_OR:
12547 case ISD::VECREDUCE_XOR:
12552 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
12553 Results.push_back(V);
12554 break;
12555 case ISD::VP_REDUCE_ADD:
12556 case ISD::VP_REDUCE_AND:
12557 case ISD::VP_REDUCE_OR:
12558 case ISD::VP_REDUCE_XOR:
12559 case ISD::VP_REDUCE_SMAX:
12560 case ISD::VP_REDUCE_UMAX:
12561 case ISD::VP_REDUCE_SMIN:
12562 case ISD::VP_REDUCE_UMIN:
12563 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
12564 Results.push_back(V);
12565 break;
12566 case ISD::GET_ROUNDING: {
12567 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
12568 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
12569 Results.push_back(Res.getValue(0));
12570 Results.push_back(Res.getValue(1));
12571 break;
12572 }
12573 }
12574}
12575
12576/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
12577/// which corresponds to it.
12578static unsigned getVecReduceOpcode(unsigned Opc) {
12579 switch (Opc) {
12580 default:
12581 llvm_unreachable("Unhandled binary to transfrom reduction");
12582 case ISD::ADD:
12583 return ISD::VECREDUCE_ADD;
12584 case ISD::UMAX:
12585 return ISD::VECREDUCE_UMAX;
12586 case ISD::SMAX:
12587 return ISD::VECREDUCE_SMAX;
12588 case ISD::UMIN:
12589 return ISD::VECREDUCE_UMIN;
12590 case ISD::SMIN:
12591 return ISD::VECREDUCE_SMIN;
12592 case ISD::AND:
12593 return ISD::VECREDUCE_AND;
12594 case ISD::OR:
12595 return ISD::VECREDUCE_OR;
12596 case ISD::XOR:
12597 return ISD::VECREDUCE_XOR;
12598 case ISD::FADD:
12599 // Note: This is the associative form of the generic reduction opcode.
12600 return ISD::VECREDUCE_FADD;
12601 }
12602}
12603
12604/// Perform two related transforms whose purpose is to incrementally recognize
12605/// an explode_vector followed by scalar reduction as a vector reduction node.
12606/// This exists to recover from a deficiency in SLP which can't handle
12607/// forests with multiple roots sharing common nodes. In some cases, one
12608/// of the trees will be vectorized, and the other will remain (unprofitably)
12609/// scalarized.
12610static SDValue
12612 const RISCVSubtarget &Subtarget) {
12613
12614 // This transforms need to run before all integer types have been legalized
12615 // to i64 (so that the vector element type matches the add type), and while
12616 // it's safe to introduce odd sized vector types.
12618 return SDValue();
12619
12620 // Without V, this transform isn't useful. We could form the (illegal)
12621 // operations and let them be scalarized again, but there's really no point.
12622 if (!Subtarget.hasVInstructions())
12623 return SDValue();
12624
12625 const SDLoc DL(N);
12626 const EVT VT = N->getValueType(0);
12627 const unsigned Opc = N->getOpcode();
12628
12629 // For FADD, we only handle the case with reassociation allowed. We
12630 // could handle strict reduction order, but at the moment, there's no
12631 // known reason to, and the complexity isn't worth it.
12632 // TODO: Handle fminnum and fmaxnum here
12633 if (!VT.isInteger() &&
12634 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
12635 return SDValue();
12636
12637 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
12638 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
12639 "Inconsistent mappings");
12640 SDValue LHS = N->getOperand(0);
12641 SDValue RHS = N->getOperand(1);
12642
12643 if (!LHS.hasOneUse() || !RHS.hasOneUse())
12644 return SDValue();
12645
12646 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12647 std::swap(LHS, RHS);
12648
12649 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12650 !isa<ConstantSDNode>(RHS.getOperand(1)))
12651 return SDValue();
12652
12653 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
12654 SDValue SrcVec = RHS.getOperand(0);
12655 EVT SrcVecVT = SrcVec.getValueType();
12656 assert(SrcVecVT.getVectorElementType() == VT);
12657 if (SrcVecVT.isScalableVector())
12658 return SDValue();
12659
12660 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
12661 return SDValue();
12662
12663 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
12664 // reduce_op (extract_subvector [2 x VT] from V). This will form the
12665 // root of our reduction tree. TODO: We could extend this to any two
12666 // adjacent aligned constant indices if desired.
12667 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12668 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
12669 uint64_t LHSIdx =
12670 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
12671 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
12672 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
12673 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12674 DAG.getVectorIdxConstant(0, DL));
12675 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
12676 }
12677 }
12678
12679 // Match (binop (reduce (extract_subvector V, 0),
12680 // (extract_vector_elt V, sizeof(SubVec))))
12681 // into a reduction of one more element from the original vector V.
12682 if (LHS.getOpcode() != ReduceOpc)
12683 return SDValue();
12684
12685 SDValue ReduceVec = LHS.getOperand(0);
12686 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
12687 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
12688 isNullConstant(ReduceVec.getOperand(1)) &&
12689 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
12690 // For illegal types (e.g. 3xi32), most will be combined again into a
12691 // wider (hopefully legal) type. If this is a terminal state, we are
12692 // relying on type legalization here to produce something reasonable
12693 // and this lowering quality could probably be improved. (TODO)
12694 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
12695 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12696 DAG.getVectorIdxConstant(0, DL));
12697 auto Flags = ReduceVec->getFlags();
12698 Flags.intersectWith(N->getFlags());
12699 return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags);
12700 }
12701
12702 return SDValue();
12703}
12704
12705
12706// Try to fold (<bop> x, (reduction.<bop> vec, start))
12708 const RISCVSubtarget &Subtarget) {
12709 auto BinOpToRVVReduce = [](unsigned Opc) {
12710 switch (Opc) {
12711 default:
12712 llvm_unreachable("Unhandled binary to transfrom reduction");
12713 case ISD::ADD:
12715 case ISD::UMAX:
12717 case ISD::SMAX:
12719 case ISD::UMIN:
12721 case ISD::SMIN:
12723 case ISD::AND:
12725 case ISD::OR:
12727 case ISD::XOR:
12729 case ISD::FADD:
12731 case ISD::FMAXNUM:
12733 case ISD::FMINNUM:
12735 }
12736 };
12737
12738 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
12739 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12740 isNullConstant(V.getOperand(1)) &&
12741 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
12742 };
12743
12744 unsigned Opc = N->getOpcode();
12745 unsigned ReduceIdx;
12746 if (IsReduction(N->getOperand(0), Opc))
12747 ReduceIdx = 0;
12748 else if (IsReduction(N->getOperand(1), Opc))
12749 ReduceIdx = 1;
12750 else
12751 return SDValue();
12752
12753 // Skip if FADD disallows reassociation but the combiner needs.
12754 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
12755 return SDValue();
12756
12757 SDValue Extract = N->getOperand(ReduceIdx);
12758 SDValue Reduce = Extract.getOperand(0);
12759 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
12760 return SDValue();
12761
12762 SDValue ScalarV = Reduce.getOperand(2);
12763 EVT ScalarVT = ScalarV.getValueType();
12764 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
12765 ScalarV.getOperand(0)->isUndef() &&
12766 isNullConstant(ScalarV.getOperand(2)))
12767 ScalarV = ScalarV.getOperand(1);
12768
12769 // Make sure that ScalarV is a splat with VL=1.
12770 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
12771 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
12772 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
12773 return SDValue();
12774
12775 if (!isNonZeroAVL(ScalarV.getOperand(2)))
12776 return SDValue();
12777
12778 // Check the scalar of ScalarV is neutral element
12779 // TODO: Deal with value other than neutral element.
12780 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
12781 0))
12782 return SDValue();
12783
12784 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
12785 // FIXME: We might be able to improve this if operand 0 is undef.
12786 if (!isNonZeroAVL(Reduce.getOperand(5)))
12787 return SDValue();
12788
12789 SDValue NewStart = N->getOperand(1 - ReduceIdx);
12790
12791 SDLoc DL(N);
12792 SDValue NewScalarV =
12793 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
12794 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
12795
12796 // If we looked through an INSERT_SUBVECTOR we need to restore it.
12797 if (ScalarVT != ScalarV.getValueType())
12798 NewScalarV =
12799 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
12800 NewScalarV, DAG.getVectorIdxConstant(0, DL));
12801
12802 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
12803 NewScalarV, Reduce.getOperand(3),
12804 Reduce.getOperand(4), Reduce.getOperand(5)};
12805 SDValue NewReduce =
12806 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
12807 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
12808 Extract.getOperand(1));
12809}
12810
12811// Optimize (add (shl x, c0), (shl y, c1)) ->
12812// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
12814 const RISCVSubtarget &Subtarget) {
12815 // Perform this optimization only in the zba extension.
12816 if (!Subtarget.hasStdExtZba())
12817 return SDValue();
12818
12819 // Skip for vector types and larger types.
12820 EVT VT = N->getValueType(0);
12821 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12822 return SDValue();
12823
12824 // The two operand nodes must be SHL and have no other use.
12825 SDValue N0 = N->getOperand(0);
12826 SDValue N1 = N->getOperand(1);
12827 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
12828 !N0->hasOneUse() || !N1->hasOneUse())
12829 return SDValue();
12830
12831 // Check c0 and c1.
12832 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12833 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
12834 if (!N0C || !N1C)
12835 return SDValue();
12836 int64_t C0 = N0C->getSExtValue();
12837 int64_t C1 = N1C->getSExtValue();
12838 if (C0 <= 0 || C1 <= 0)
12839 return SDValue();
12840
12841 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
12842 int64_t Bits = std::min(C0, C1);
12843 int64_t Diff = std::abs(C0 - C1);
12844 if (Diff != 1 && Diff != 2 && Diff != 3)
12845 return SDValue();
12846
12847 // Build nodes.
12848 SDLoc DL(N);
12849 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
12850 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
12851 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
12852 DAG.getConstant(Diff, DL, VT), NS);
12853 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
12854}
12855
12856// Combine a constant select operand into its use:
12857//
12858// (and (select cond, -1, c), x)
12859// -> (select cond, x, (and x, c)) [AllOnes=1]
12860// (or (select cond, 0, c), x)
12861// -> (select cond, x, (or x, c)) [AllOnes=0]
12862// (xor (select cond, 0, c), x)
12863// -> (select cond, x, (xor x, c)) [AllOnes=0]
12864// (add (select cond, 0, c), x)
12865// -> (select cond, x, (add x, c)) [AllOnes=0]
12866// (sub x, (select cond, 0, c))
12867// -> (select cond, x, (sub x, c)) [AllOnes=0]
12869 SelectionDAG &DAG, bool AllOnes,
12870 const RISCVSubtarget &Subtarget) {
12871 EVT VT = N->getValueType(0);
12872
12873 // Skip vectors.
12874 if (VT.isVector())
12875 return SDValue();
12876
12877 if (!Subtarget.hasConditionalMoveFusion()) {
12878 // (select cond, x, (and x, c)) has custom lowering with Zicond.
12879 if ((!Subtarget.hasStdExtZicond() &&
12880 !Subtarget.hasVendorXVentanaCondOps()) ||
12881 N->getOpcode() != ISD::AND)
12882 return SDValue();
12883
12884 // Maybe harmful when condition code has multiple use.
12885 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
12886 return SDValue();
12887
12888 // Maybe harmful when VT is wider than XLen.
12889 if (VT.getSizeInBits() > Subtarget.getXLen())
12890 return SDValue();
12891 }
12892
12893 if ((Slct.getOpcode() != ISD::SELECT &&
12894 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
12895 !Slct.hasOneUse())
12896 return SDValue();
12897
12898 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
12900 };
12901
12902 bool SwapSelectOps;
12903 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
12904 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
12905 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
12906 SDValue NonConstantVal;
12907 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
12908 SwapSelectOps = false;
12909 NonConstantVal = FalseVal;
12910 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
12911 SwapSelectOps = true;
12912 NonConstantVal = TrueVal;
12913 } else
12914 return SDValue();
12915
12916 // Slct is now know to be the desired identity constant when CC is true.
12917 TrueVal = OtherOp;
12918 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
12919 // Unless SwapSelectOps says the condition should be false.
12920 if (SwapSelectOps)
12921 std::swap(TrueVal, FalseVal);
12922
12923 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
12924 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
12925 {Slct.getOperand(0), Slct.getOperand(1),
12926 Slct.getOperand(2), TrueVal, FalseVal});
12927
12928 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12929 {Slct.getOperand(0), TrueVal, FalseVal});
12930}
12931
12932// Attempt combineSelectAndUse on each operand of a commutative operator N.
12934 bool AllOnes,
12935 const RISCVSubtarget &Subtarget) {
12936 SDValue N0 = N->getOperand(0);
12937 SDValue N1 = N->getOperand(1);
12938 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
12939 return Result;
12940 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
12941 return Result;
12942 return SDValue();
12943}
12944
12945// Transform (add (mul x, c0), c1) ->
12946// (add (mul (add x, c1/c0), c0), c1%c0).
12947// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
12948// that should be excluded is when c0*(c1/c0) is simm12, which will lead
12949// to an infinite loop in DAGCombine if transformed.
12950// Or transform (add (mul x, c0), c1) ->
12951// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
12952// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
12953// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
12954// lead to an infinite loop in DAGCombine if transformed.
12955// Or transform (add (mul x, c0), c1) ->
12956// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
12957// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
12958// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
12959// lead to an infinite loop in DAGCombine if transformed.
12960// Or transform (add (mul x, c0), c1) ->
12961// (mul (add x, c1/c0), c0).
12962// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
12964 const RISCVSubtarget &Subtarget) {
12965 // Skip for vector types and larger types.
12966 EVT VT = N->getValueType(0);
12967 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12968 return SDValue();
12969 // The first operand node must be a MUL and has no other use.
12970 SDValue N0 = N->getOperand(0);
12971 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
12972 return SDValue();
12973 // Check if c0 and c1 match above conditions.
12974 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12975 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
12976 if (!N0C || !N1C)
12977 return SDValue();
12978 // If N0C has multiple uses it's possible one of the cases in
12979 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
12980 // in an infinite loop.
12981 if (!N0C->hasOneUse())
12982 return SDValue();
12983 int64_t C0 = N0C->getSExtValue();
12984 int64_t C1 = N1C->getSExtValue();
12985 int64_t CA, CB;
12986 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
12987 return SDValue();
12988 // Search for proper CA (non-zero) and CB that both are simm12.
12989 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
12990 !isInt<12>(C0 * (C1 / C0))) {
12991 CA = C1 / C0;
12992 CB = C1 % C0;
12993 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
12994 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
12995 CA = C1 / C0 + 1;
12996 CB = C1 % C0 - C0;
12997 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
12998 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
12999 CA = C1 / C0 - 1;
13000 CB = C1 % C0 + C0;
13001 } else
13002 return SDValue();
13003 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
13004 SDLoc DL(N);
13005 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
13006 DAG.getSignedConstant(CA, DL, VT));
13007 SDValue New1 =
13008 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
13009 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
13010}
13011
13012// add (zext, zext) -> zext (add (zext, zext))
13013// sub (zext, zext) -> sext (sub (zext, zext))
13014// mul (zext, zext) -> zext (mul (zext, zext))
13015// sdiv (zext, zext) -> zext (sdiv (zext, zext))
13016// udiv (zext, zext) -> zext (udiv (zext, zext))
13017// srem (zext, zext) -> zext (srem (zext, zext))
13018// urem (zext, zext) -> zext (urem (zext, zext))
13019//
13020// where the sum of the extend widths match, and the the range of the bin op
13021// fits inside the width of the narrower bin op. (For profitability on rvv, we
13022// use a power of two for both inner and outer extend.)
13024
13025 EVT VT = N->getValueType(0);
13026 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
13027 return SDValue();
13028
13029 SDValue N0 = N->getOperand(0);
13030 SDValue N1 = N->getOperand(1);
13032 return SDValue();
13033 if (!N0.hasOneUse() || !N1.hasOneUse())
13034 return SDValue();
13035
13036 SDValue Src0 = N0.getOperand(0);
13037 SDValue Src1 = N1.getOperand(0);
13038 EVT SrcVT = Src0.getValueType();
13039 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
13040 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
13041 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
13042 return SDValue();
13043
13044 LLVMContext &C = *DAG.getContext();
13046 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
13047
13048 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
13049 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
13050
13051 // Src0 and Src1 are zero extended, so they're always positive if signed.
13052 //
13053 // sub can produce a negative from two positive operands, so it needs sign
13054 // extended. Other nodes produce a positive from two positive operands, so
13055 // zero extend instead.
13056 unsigned OuterExtend =
13057 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13058
13059 return DAG.getNode(
13060 OuterExtend, SDLoc(N), VT,
13061 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
13062}
13063
13064// Try to turn (add (xor bool, 1) -1) into (neg bool).
13066 SDValue N0 = N->getOperand(0);
13067 SDValue N1 = N->getOperand(1);
13068 EVT VT = N->getValueType(0);
13069 SDLoc DL(N);
13070
13071 // RHS should be -1.
13072 if (!isAllOnesConstant(N1))
13073 return SDValue();
13074
13075 // Look for (xor X, 1).
13076 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
13077 return SDValue();
13078
13079 // First xor input should be 0 or 1.
13081 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
13082 return SDValue();
13083
13084 // Emit a negate of the setcc.
13085 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
13086 N0.getOperand(0));
13087}
13088
13091 const RISCVSubtarget &Subtarget) {
13092 SelectionDAG &DAG = DCI.DAG;
13093 if (SDValue V = combineAddOfBooleanXor(N, DAG))
13094 return V;
13095 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
13096 return V;
13097 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
13098 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
13099 return V;
13100 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13101 return V;
13102 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13103 return V;
13104 if (SDValue V = combineBinOpOfZExt(N, DAG))
13105 return V;
13106
13107 // fold (add (select lhs, rhs, cc, 0, y), x) ->
13108 // (select lhs, rhs, cc, x, (add x, y))
13109 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13110}
13111
13112// Try to turn a sub boolean RHS and constant LHS into an addi.
13114 SDValue N0 = N->getOperand(0);
13115 SDValue N1 = N->getOperand(1);
13116 EVT VT = N->getValueType(0);
13117 SDLoc DL(N);
13118
13119 // Require a constant LHS.
13120 auto *N0C = dyn_cast<ConstantSDNode>(N0);
13121 if (!N0C)
13122 return SDValue();
13123
13124 // All our optimizations involve subtracting 1 from the immediate and forming
13125 // an ADDI. Make sure the new immediate is valid for an ADDI.
13126 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
13127 if (!ImmValMinus1.isSignedIntN(12))
13128 return SDValue();
13129
13130 SDValue NewLHS;
13131 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
13132 // (sub constant, (setcc x, y, eq/neq)) ->
13133 // (add (setcc x, y, neq/eq), constant - 1)
13134 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13135 EVT SetCCOpVT = N1.getOperand(0).getValueType();
13136 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
13137 return SDValue();
13138 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
13139 NewLHS =
13140 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
13141 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
13142 N1.getOperand(0).getOpcode() == ISD::SETCC) {
13143 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
13144 // Since setcc returns a bool the xor is equivalent to 1-setcc.
13145 NewLHS = N1.getOperand(0);
13146 } else
13147 return SDValue();
13148
13149 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
13150 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
13151}
13152
13153// Looks for (sub (shl X, 8), X) where only bits 8, 16, 24, 32, etc. of X are
13154// non-zero. Replace with orc.b.
13156 const RISCVSubtarget &Subtarget) {
13157 if (!Subtarget.hasStdExtZbb())
13158 return SDValue();
13159
13160 EVT VT = N->getValueType(0);
13161
13162 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
13163 return SDValue();
13164
13165 SDValue N0 = N->getOperand(0);
13166 SDValue N1 = N->getOperand(1);
13167
13168 if (N0.getOpcode() != ISD::SHL || N0.getOperand(0) != N1 || !N0.hasOneUse())
13169 return SDValue();
13170
13171 auto *ShAmtC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
13172 if (!ShAmtC || ShAmtC->getZExtValue() != 8)
13173 return SDValue();
13174
13175 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0xfe));
13176 if (!DAG.MaskedValueIsZero(N1, Mask))
13177 return SDValue();
13178
13179 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, N1);
13180}
13181
13183 const RISCVSubtarget &Subtarget) {
13184 if (SDValue V = combineSubOfBoolean(N, DAG))
13185 return V;
13186
13187 EVT VT = N->getValueType(0);
13188 SDValue N0 = N->getOperand(0);
13189 SDValue N1 = N->getOperand(1);
13190 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
13191 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
13192 isNullConstant(N1.getOperand(1))) {
13193 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13194 if (CCVal == ISD::SETLT) {
13195 SDLoc DL(N);
13196 unsigned ShAmt = N0.getValueSizeInBits() - 1;
13197 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
13198 DAG.getConstant(ShAmt, DL, VT));
13199 }
13200 }
13201
13202 if (SDValue V = combineBinOpOfZExt(N, DAG))
13203 return V;
13204 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
13205 return V;
13206
13207 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
13208 // (select lhs, rhs, cc, x, (sub x, y))
13209 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
13210}
13211
13212// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
13213// Legalizing setcc can introduce xors like this. Doing this transform reduces
13214// the number of xors and may allow the xor to fold into a branch condition.
13216 SDValue N0 = N->getOperand(0);
13217 SDValue N1 = N->getOperand(1);
13218 bool IsAnd = N->getOpcode() == ISD::AND;
13219
13220 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
13221 return SDValue();
13222
13223 if (!N0.hasOneUse() || !N1.hasOneUse())
13224 return SDValue();
13225
13226 SDValue N01 = N0.getOperand(1);
13227 SDValue N11 = N1.getOperand(1);
13228
13229 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
13230 // (xor X, -1) based on the upper bits of the other operand being 0. If the
13231 // operation is And, allow one of the Xors to use -1.
13232 if (isOneConstant(N01)) {
13233 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
13234 return SDValue();
13235 } else if (isOneConstant(N11)) {
13236 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
13237 if (!(IsAnd && isAllOnesConstant(N01)))
13238 return SDValue();
13239 } else
13240 return SDValue();
13241
13242 EVT VT = N->getValueType(0);
13243
13244 SDValue N00 = N0.getOperand(0);
13245 SDValue N10 = N1.getOperand(0);
13246
13247 // The LHS of the xors needs to be 0/1.
13249 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
13250 return SDValue();
13251
13252 // Invert the opcode and insert a new xor.
13253 SDLoc DL(N);
13254 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
13255 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
13256 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
13257}
13258
13259// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
13260// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
13261// value to an unsigned value. This will be lowered to vmax and series of
13262// vnclipu instructions later. This can be extended to other truncated types
13263// other than i8 by replacing 256 and 255 with the equivalent constants for the
13264// type.
13266 EVT VT = N->getValueType(0);
13267 SDValue N0 = N->getOperand(0);
13268 EVT SrcVT = N0.getValueType();
13269
13270 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13271 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
13272 return SDValue();
13273
13274 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
13275 return SDValue();
13276
13277 SDValue Cond = N0.getOperand(0);
13278 SDValue True = N0.getOperand(1);
13279 SDValue False = N0.getOperand(2);
13280
13281 if (Cond.getOpcode() != ISD::SETCC)
13282 return SDValue();
13283
13284 // FIXME: Support the version of this pattern with the select operands
13285 // swapped.
13286 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
13287 if (CCVal != ISD::SETULT)
13288 return SDValue();
13289
13290 SDValue CondLHS = Cond.getOperand(0);
13291 SDValue CondRHS = Cond.getOperand(1);
13292
13293 if (CondLHS != True)
13294 return SDValue();
13295
13296 unsigned ScalarBits = VT.getScalarSizeInBits();
13297
13298 // FIXME: Support other constants.
13299 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
13300 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
13301 return SDValue();
13302
13303 if (False.getOpcode() != ISD::SIGN_EXTEND)
13304 return SDValue();
13305
13306 False = False.getOperand(0);
13307
13308 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
13309 return SDValue();
13310
13311 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
13312 if (!FalseRHSC || !FalseRHSC->isZero())
13313 return SDValue();
13314
13315 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
13316 if (CCVal2 != ISD::SETGT)
13317 return SDValue();
13318
13319 // Emit the signed to unsigned saturation pattern.
13320 SDLoc DL(N);
13321 SDValue Max =
13322 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
13323 SDValue Min =
13324 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
13325 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
13326 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
13327}
13328
13330 const RISCVSubtarget &Subtarget) {
13331 SDValue N0 = N->getOperand(0);
13332 EVT VT = N->getValueType(0);
13333
13334 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
13335 // extending X. This is safe since we only need the LSB after the shift and
13336 // shift amounts larger than 31 would produce poison. If we wait until
13337 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13338 // to use a BEXT instruction.
13339 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
13340 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
13341 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13342 SDLoc DL(N0);
13343 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13344 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13345 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13346 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
13347 }
13348
13349 return combineTruncSelectToSMaxUSat(N, DAG);
13350}
13351
13352// Combines two comparison operation and logic operation to one selection
13353// operation(min, max) and logic operation. Returns new constructed Node if
13354// conditions for optimization are satisfied.
13357 const RISCVSubtarget &Subtarget) {
13358 SelectionDAG &DAG = DCI.DAG;
13359
13360 SDValue N0 = N->getOperand(0);
13361 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
13362 // extending X. This is safe since we only need the LSB after the shift and
13363 // shift amounts larger than 31 would produce poison. If we wait until
13364 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13365 // to use a BEXT instruction.
13366 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13367 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
13368 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
13369 N0.hasOneUse()) {
13370 SDLoc DL(N);
13371 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13372 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13373 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13374 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
13375 DAG.getConstant(1, DL, MVT::i64));
13376 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13377 }
13378
13379 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13380 return V;
13381 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13382 return V;
13383
13384 if (DCI.isAfterLegalizeDAG())
13385 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13386 return V;
13387
13388 // fold (and (select lhs, rhs, cc, -1, y), x) ->
13389 // (select lhs, rhs, cc, x, (and x, y))
13390 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
13391}
13392
13393// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
13394// FIXME: Generalize to other binary operators with same operand.
13396 SelectionDAG &DAG) {
13397 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
13398
13399 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
13401 !N0.hasOneUse() || !N1.hasOneUse())
13402 return SDValue();
13403
13404 // Should have the same condition.
13405 SDValue Cond = N0.getOperand(1);
13406 if (Cond != N1.getOperand(1))
13407 return SDValue();
13408
13409 SDValue TrueV = N0.getOperand(0);
13410 SDValue FalseV = N1.getOperand(0);
13411
13412 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
13413 TrueV.getOperand(1) != FalseV.getOperand(1) ||
13414 !isOneConstant(TrueV.getOperand(1)) ||
13415 !TrueV.hasOneUse() || !FalseV.hasOneUse())
13416 return SDValue();
13417
13418 EVT VT = N->getValueType(0);
13419 SDLoc DL(N);
13420
13421 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
13422 Cond);
13423 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
13424 Cond);
13425 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
13426 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
13427}
13428
13430 const RISCVSubtarget &Subtarget) {
13431 SelectionDAG &DAG = DCI.DAG;
13432
13433 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13434 return V;
13435 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13436 return V;
13437
13438 if (DCI.isAfterLegalizeDAG())
13439 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13440 return V;
13441
13442 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
13443 // We may be able to pull a common operation out of the true and false value.
13444 SDValue N0 = N->getOperand(0);
13445 SDValue N1 = N->getOperand(1);
13446 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
13447 return V;
13448 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
13449 return V;
13450
13451 // fold (or (select cond, 0, y), x) ->
13452 // (select cond, x, (or x, y))
13453 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13454}
13455
13457 const RISCVSubtarget &Subtarget) {
13458 SDValue N0 = N->getOperand(0);
13459 SDValue N1 = N->getOperand(1);
13460
13461 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
13462 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
13463 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
13464 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13465 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
13466 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
13467 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13468 SDLoc DL(N);
13469 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13470 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13471 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
13472 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
13473 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13474 }
13475
13476 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
13477 // NOTE: Assumes ROL being legal means ROLW is legal.
13478 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13479 if (N0.getOpcode() == RISCVISD::SLLW &&
13481 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
13482 SDLoc DL(N);
13483 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
13484 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
13485 }
13486
13487 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
13488 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
13489 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
13490 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13491 if (ConstN00 && CC == ISD::SETLT) {
13492 EVT VT = N0.getValueType();
13493 SDLoc DL(N0);
13494 const APInt &Imm = ConstN00->getAPIntValue();
13495 if ((Imm + 1).isSignedIntN(12))
13496 return DAG.getSetCC(DL, VT, N0.getOperand(1),
13497 DAG.getConstant(Imm + 1, DL, VT), CC);
13498 }
13499 }
13500
13501 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13502 return V;
13503 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13504 return V;
13505
13506 // fold (xor (select cond, 0, y), x) ->
13507 // (select cond, x, (xor x, y))
13508 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13509}
13510
13511// Try to expand a scalar multiply to a faster sequence.
13514 const RISCVSubtarget &Subtarget) {
13515
13516 EVT VT = N->getValueType(0);
13517
13518 // LI + MUL is usually smaller than the alternative sequence.
13520 return SDValue();
13521
13522 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
13523 return SDValue();
13524
13525 if (VT != Subtarget.getXLenVT())
13526 return SDValue();
13527
13528 const bool HasShlAdd =
13529 Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
13530
13531 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
13532 if (!CNode)
13533 return SDValue();
13534 uint64_t MulAmt = CNode->getZExtValue();
13535
13536 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
13537 // We're adding additional uses of X here, and in principle, we should be freezing
13538 // X before doing so. However, adding freeze here causes real regressions, and no
13539 // other target properly freezes X in these cases either.
13540 SDValue X = N->getOperand(0);
13541
13542 if (HasShlAdd) {
13543 for (uint64_t Divisor : {3, 5, 9}) {
13544 if (MulAmt % Divisor != 0)
13545 continue;
13546 uint64_t MulAmt2 = MulAmt / Divisor;
13547 // 3/5/9 * 2^N -> shl (shXadd X, X), N
13548 if (isPowerOf2_64(MulAmt2)) {
13549 SDLoc DL(N);
13550 SDValue X = N->getOperand(0);
13551 // Put the shift first if we can fold a zext into the
13552 // shift forming a slli.uw.
13553 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
13554 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
13555 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
13556 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
13557 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
13558 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
13559 Shl);
13560 }
13561 // Otherwise, put rhe shl second so that it can fold with following
13562 // instructions (e.g. sext or add).
13563 SDValue Mul359 =
13564 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13565 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13566 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
13567 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
13568 }
13569
13570 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
13571 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
13572 SDLoc DL(N);
13573 SDValue Mul359 =
13574 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13575 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13576 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13577 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
13578 Mul359);
13579 }
13580 }
13581
13582 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
13583 // shXadd. First check if this a sum of two power of 2s because that's
13584 // easy. Then count how many zeros are up to the first bit.
13585 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
13586 unsigned ScaleShift = llvm::countr_zero(MulAmt);
13587 if (ScaleShift >= 1 && ScaleShift < 4) {
13588 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
13589 SDLoc DL(N);
13590 SDValue Shift1 =
13591 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13592 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13593 DAG.getConstant(ScaleShift, DL, VT), Shift1);
13594 }
13595 }
13596
13597 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
13598 // This is the two instruction form, there are also three instruction
13599 // variants we could implement. e.g.
13600 // (2^(1,2,3) * 3,5,9 + 1) << C2
13601 // 2^(C1>3) * 3,5,9 +/- 1
13602 for (uint64_t Divisor : {3, 5, 9}) {
13603 uint64_t C = MulAmt - 1;
13604 if (C <= Divisor)
13605 continue;
13606 unsigned TZ = llvm::countr_zero(C);
13607 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
13608 SDLoc DL(N);
13609 SDValue Mul359 =
13610 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13611 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13612 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13613 DAG.getConstant(TZ, DL, VT), X);
13614 }
13615 }
13616
13617 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
13618 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
13619 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
13620 if (ScaleShift >= 1 && ScaleShift < 4) {
13621 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
13622 SDLoc DL(N);
13623 SDValue Shift1 =
13624 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13625 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
13626 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13627 DAG.getConstant(ScaleShift, DL, VT), X));
13628 }
13629 }
13630
13631 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
13632 for (uint64_t Offset : {3, 5, 9}) {
13633 if (isPowerOf2_64(MulAmt + Offset)) {
13634 SDLoc DL(N);
13635 SDValue Shift1 =
13636 DAG.getNode(ISD::SHL, DL, VT, X,
13637 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
13638 SDValue Mul359 =
13639 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13640 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
13641 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
13642 }
13643 }
13644 }
13645
13646 // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
13647 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
13648 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
13649 uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
13650 SDLoc DL(N);
13651 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
13652 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
13653 SDValue Shift2 =
13654 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
13655 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
13656 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
13657 }
13658
13659 return SDValue();
13660}
13661
13662// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
13663// (bitcast (sra (v2Xi16 (bitcast X)), 15))
13664// Same for other equivalent types with other equivalent constants.
13666 EVT VT = N->getValueType(0);
13667 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13668
13669 // Do this for legal vectors unless they are i1 or i8 vectors.
13670 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
13671 return SDValue();
13672
13673 if (N->getOperand(0).getOpcode() != ISD::AND ||
13674 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
13675 return SDValue();
13676
13677 SDValue And = N->getOperand(0);
13678 SDValue Srl = And.getOperand(0);
13679
13680 APInt V1, V2, V3;
13681 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
13682 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
13684 return SDValue();
13685
13686 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
13687 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
13688 V3 != (HalfSize - 1))
13689 return SDValue();
13690
13691 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
13692 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
13693 VT.getVectorElementCount() * 2);
13694 SDLoc DL(N);
13695 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
13696 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
13697 DAG.getConstant(HalfSize - 1, DL, HalfVT));
13698 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
13699}
13700
13703 const RISCVSubtarget &Subtarget) {
13704 EVT VT = N->getValueType(0);
13705 if (!VT.isVector())
13706 return expandMul(N, DAG, DCI, Subtarget);
13707
13708 SDLoc DL(N);
13709 SDValue N0 = N->getOperand(0);
13710 SDValue N1 = N->getOperand(1);
13711 SDValue MulOper;
13712 unsigned AddSubOpc;
13713
13714 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
13715 // (mul x, add (y, 1)) -> (add x, (mul x, y))
13716 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
13717 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
13718 auto IsAddSubWith1 = [&](SDValue V) -> bool {
13719 AddSubOpc = V->getOpcode();
13720 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
13721 SDValue Opnd = V->getOperand(1);
13722 MulOper = V->getOperand(0);
13723 if (AddSubOpc == ISD::SUB)
13724 std::swap(Opnd, MulOper);
13725 if (isOneOrOneSplat(Opnd))
13726 return true;
13727 }
13728 return false;
13729 };
13730
13731 if (IsAddSubWith1(N0)) {
13732 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
13733 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
13734 }
13735
13736 if (IsAddSubWith1(N1)) {
13737 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
13738 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
13739 }
13740
13741 if (SDValue V = combineBinOpOfZExt(N, DAG))
13742 return V;
13743
13745 return V;
13746
13747 return SDValue();
13748}
13749
13750/// According to the property that indexed load/store instructions zero-extend
13751/// their indices, try to narrow the type of index operand.
13752static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
13753 if (isIndexTypeSigned(IndexType))
13754 return false;
13755
13756 if (!N->hasOneUse())
13757 return false;
13758
13759 EVT VT = N.getValueType();
13760 SDLoc DL(N);
13761
13762 // In general, what we're doing here is seeing if we can sink a truncate to
13763 // a smaller element type into the expression tree building our index.
13764 // TODO: We can generalize this and handle a bunch more cases if useful.
13765
13766 // Narrow a buildvector to the narrowest element type. This requires less
13767 // work and less register pressure at high LMUL, and creates smaller constants
13768 // which may be cheaper to materialize.
13769 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
13770 KnownBits Known = DAG.computeKnownBits(N);
13771 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
13772 LLVMContext &C = *DAG.getContext();
13773 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
13774 if (ResultVT.bitsLT(VT.getVectorElementType())) {
13775 N = DAG.getNode(ISD::TRUNCATE, DL,
13776 VT.changeVectorElementType(ResultVT), N);
13777 return true;
13778 }
13779 }
13780
13781 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
13782 if (N.getOpcode() != ISD::SHL)
13783 return false;
13784
13785 SDValue N0 = N.getOperand(0);
13786 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
13788 return false;
13789 if (!N0->hasOneUse())
13790 return false;
13791
13792 APInt ShAmt;
13793 SDValue N1 = N.getOperand(1);
13794 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
13795 return false;
13796
13797 SDValue Src = N0.getOperand(0);
13798 EVT SrcVT = Src.getValueType();
13799 unsigned SrcElen = SrcVT.getScalarSizeInBits();
13800 unsigned ShAmtV = ShAmt.getZExtValue();
13801 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
13802 NewElen = std::max(NewElen, 8U);
13803
13804 // Skip if NewElen is not narrower than the original extended type.
13805 if (NewElen >= N0.getValueType().getScalarSizeInBits())
13806 return false;
13807
13808 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
13809 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
13810
13811 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
13812 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
13813 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
13814 return true;
13815}
13816
13817// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
13818// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
13819// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
13820// can become a sext.w instead of a shift pair.
13822 const RISCVSubtarget &Subtarget) {
13823 SDValue N0 = N->getOperand(0);
13824 SDValue N1 = N->getOperand(1);
13825 EVT VT = N->getValueType(0);
13826 EVT OpVT = N0.getValueType();
13827
13828 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
13829 return SDValue();
13830
13831 // RHS needs to be a constant.
13832 auto *N1C = dyn_cast<ConstantSDNode>(N1);
13833 if (!N1C)
13834 return SDValue();
13835
13836 // LHS needs to be (and X, 0xffffffff).
13837 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
13838 !isa<ConstantSDNode>(N0.getOperand(1)) ||
13839 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
13840 return SDValue();
13841
13842 // Looking for an equality compare.
13843 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
13844 if (!isIntEqualitySetCC(Cond))
13845 return SDValue();
13846
13847 // Don't do this if the sign bit is provably zero, it will be turned back into
13848 // an AND.
13849 APInt SignMask = APInt::getOneBitSet(64, 31);
13850 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
13851 return SDValue();
13852
13853 const APInt &C1 = N1C->getAPIntValue();
13854
13855 SDLoc dl(N);
13856 // If the constant is larger than 2^32 - 1 it is impossible for both sides
13857 // to be equal.
13858 if (C1.getActiveBits() > 32)
13859 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
13860
13861 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
13862 N0.getOperand(0), DAG.getValueType(MVT::i32));
13863 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
13864 dl, OpVT), Cond);
13865}
13866
13867static SDValue
13869 const RISCVSubtarget &Subtarget) {
13870 SDValue Src = N->getOperand(0);
13871 EVT VT = N->getValueType(0);
13872
13873 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
13874 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
13875 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
13876 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16) &&
13877 Subtarget.hasStdExtZfhmin())
13878 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
13879 Src.getOperand(0));
13880
13881 return SDValue();
13882}
13883
13884namespace {
13885// Forward declaration of the structure holding the necessary information to
13886// apply a combine.
13887struct CombineResult;
13888
13889enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
13890/// Helper class for folding sign/zero extensions.
13891/// In particular, this class is used for the following combines:
13892/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
13893/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
13894/// mul | mul_vl -> vwmul(u) | vwmul_su
13895/// shl | shl_vl -> vwsll
13896/// fadd -> vfwadd | vfwadd_w
13897/// fsub -> vfwsub | vfwsub_w
13898/// fmul -> vfwmul
13899/// An object of this class represents an operand of the operation we want to
13900/// combine.
13901/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
13902/// NodeExtensionHelper for `a` and one for `b`.
13903///
13904/// This class abstracts away how the extension is materialized and
13905/// how its number of users affect the combines.
13906///
13907/// In particular:
13908/// - VWADD_W is conceptually == add(op0, sext(op1))
13909/// - VWADDU_W == add(op0, zext(op1))
13910/// - VWSUB_W == sub(op0, sext(op1))
13911/// - VWSUBU_W == sub(op0, zext(op1))
13912/// - VFWADD_W == fadd(op0, fpext(op1))
13913/// - VFWSUB_W == fsub(op0, fpext(op1))
13914/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
13915/// zext|sext(smaller_value).
13916struct NodeExtensionHelper {
13917 /// Records if this operand is like being zero extended.
13918 bool SupportsZExt;
13919 /// Records if this operand is like being sign extended.
13920 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
13921 /// instance, a splat constant (e.g., 3), would support being both sign and
13922 /// zero extended.
13923 bool SupportsSExt;
13924 /// Records if this operand is like being floating-Point extended.
13925 bool SupportsFPExt;
13926 /// This boolean captures whether we care if this operand would still be
13927 /// around after the folding happens.
13928 bool EnforceOneUse;
13929 /// Original value that this NodeExtensionHelper represents.
13930 SDValue OrigOperand;
13931
13932 /// Get the value feeding the extension or the value itself.
13933 /// E.g., for zext(a), this would return a.
13934 SDValue getSource() const {
13935 switch (OrigOperand.getOpcode()) {
13936 case ISD::ZERO_EXTEND:
13937 case ISD::SIGN_EXTEND:
13938 case RISCVISD::VSEXT_VL:
13939 case RISCVISD::VZEXT_VL:
13941 return OrigOperand.getOperand(0);
13942 default:
13943 return OrigOperand;
13944 }
13945 }
13946
13947 /// Check if this instance represents a splat.
13948 bool isSplat() const {
13949 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
13950 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
13951 }
13952
13953 /// Get the extended opcode.
13954 unsigned getExtOpc(ExtKind SupportsExt) const {
13955 switch (SupportsExt) {
13956 case ExtKind::SExt:
13957 return RISCVISD::VSEXT_VL;
13958 case ExtKind::ZExt:
13959 return RISCVISD::VZEXT_VL;
13960 case ExtKind::FPExt:
13962 }
13963 llvm_unreachable("Unknown ExtKind enum");
13964 }
13965
13966 /// Get or create a value that can feed \p Root with the given extension \p
13967 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
13968 /// operand. \see ::getSource().
13969 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
13970 const RISCVSubtarget &Subtarget,
13971 std::optional<ExtKind> SupportsExt) const {
13972 if (!SupportsExt.has_value())
13973 return OrigOperand;
13974
13975 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
13976
13977 SDValue Source = getSource();
13978 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
13979 if (Source.getValueType() == NarrowVT)
13980 return Source;
13981
13982 unsigned ExtOpc = getExtOpc(*SupportsExt);
13983
13984 // If we need an extension, we should be changing the type.
13985 SDLoc DL(OrigOperand);
13986 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
13987 switch (OrigOperand.getOpcode()) {
13988 case ISD::ZERO_EXTEND:
13989 case ISD::SIGN_EXTEND:
13990 case RISCVISD::VSEXT_VL:
13991 case RISCVISD::VZEXT_VL:
13993 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
13994 case ISD::SPLAT_VECTOR:
13995 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
13997 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
13998 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
14000 Source = Source.getOperand(1);
14001 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
14002 Source = Source.getOperand(0);
14003 assert(Source.getValueType() == NarrowVT.getVectorElementType());
14004 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
14005 DAG.getUNDEF(NarrowVT), Source, VL);
14006 default:
14007 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
14008 // and that operand should already have the right NarrowVT so no
14009 // extension should be required at this point.
14010 llvm_unreachable("Unsupported opcode");
14011 }
14012 }
14013
14014 /// Helper function to get the narrow type for \p Root.
14015 /// The narrow type is the type of \p Root where we divided the size of each
14016 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
14017 /// \pre Both the narrow type and the original type should be legal.
14018 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
14019 MVT VT = Root->getSimpleValueType(0);
14020
14021 // Determine the narrow size.
14022 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
14023
14024 MVT EltVT = SupportsExt == ExtKind::FPExt
14025 ? MVT::getFloatingPointVT(NarrowSize)
14026 : MVT::getIntegerVT(NarrowSize);
14027
14028 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
14029 "Trying to extend something we can't represent");
14030 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
14031 return NarrowVT;
14032 }
14033
14034 /// Get the opcode to materialize:
14035 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
14036 static unsigned getSExtOpcode(unsigned Opcode) {
14037 switch (Opcode) {
14038 case ISD::ADD:
14039 case RISCVISD::ADD_VL:
14042 case ISD::OR:
14043 return RISCVISD::VWADD_VL;
14044 case ISD::SUB:
14045 case RISCVISD::SUB_VL:
14048 return RISCVISD::VWSUB_VL;
14049 case ISD::MUL:
14050 case RISCVISD::MUL_VL:
14051 return RISCVISD::VWMUL_VL;
14052 default:
14053 llvm_unreachable("Unexpected opcode");
14054 }
14055 }
14056
14057 /// Get the opcode to materialize:
14058 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
14059 static unsigned getZExtOpcode(unsigned Opcode) {
14060 switch (Opcode) {
14061 case ISD::ADD:
14062 case RISCVISD::ADD_VL:
14065 case ISD::OR:
14066 return RISCVISD::VWADDU_VL;
14067 case ISD::SUB:
14068 case RISCVISD::SUB_VL:
14071 return RISCVISD::VWSUBU_VL;
14072 case ISD::MUL:
14073 case RISCVISD::MUL_VL:
14074 return RISCVISD::VWMULU_VL;
14075 case ISD::SHL:
14076 case RISCVISD::SHL_VL:
14077 return RISCVISD::VWSLL_VL;
14078 default:
14079 llvm_unreachable("Unexpected opcode");
14080 }
14081 }
14082
14083 /// Get the opcode to materialize:
14084 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
14085 static unsigned getFPExtOpcode(unsigned Opcode) {
14086 switch (Opcode) {
14087 case RISCVISD::FADD_VL:
14089 return RISCVISD::VFWADD_VL;
14090 case RISCVISD::FSUB_VL:
14092 return RISCVISD::VFWSUB_VL;
14093 case RISCVISD::FMUL_VL:
14094 return RISCVISD::VFWMUL_VL;
14096 return RISCVISD::VFWMADD_VL;
14098 return RISCVISD::VFWMSUB_VL;
14100 return RISCVISD::VFWNMADD_VL;
14102 return RISCVISD::VFWNMSUB_VL;
14103 default:
14104 llvm_unreachable("Unexpected opcode");
14105 }
14106 }
14107
14108 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
14109 /// newOpcode(a, b).
14110 static unsigned getSUOpcode(unsigned Opcode) {
14111 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
14112 "SU is only supported for MUL");
14113 return RISCVISD::VWMULSU_VL;
14114 }
14115
14116 /// Get the opcode to materialize
14117 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
14118 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
14119 switch (Opcode) {
14120 case ISD::ADD:
14121 case RISCVISD::ADD_VL:
14122 case ISD::OR:
14123 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
14125 case ISD::SUB:
14126 case RISCVISD::SUB_VL:
14127 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
14129 case RISCVISD::FADD_VL:
14130 return RISCVISD::VFWADD_W_VL;
14131 case RISCVISD::FSUB_VL:
14132 return RISCVISD::VFWSUB_W_VL;
14133 default:
14134 llvm_unreachable("Unexpected opcode");
14135 }
14136 }
14137
14138 using CombineToTry = std::function<std::optional<CombineResult>(
14139 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
14140 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
14141 const RISCVSubtarget &)>;
14142
14143 /// Check if this node needs to be fully folded or extended for all users.
14144 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
14145
14146 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
14147 const RISCVSubtarget &Subtarget) {
14148 unsigned Opc = OrigOperand.getOpcode();
14149 MVT VT = OrigOperand.getSimpleValueType();
14150
14151 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
14152 "Unexpected Opcode");
14153
14154 // The pasthru must be undef for tail agnostic.
14155 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
14156 return;
14157
14158 // Get the scalar value.
14159 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
14160 : OrigOperand.getOperand(1);
14161
14162 // See if we have enough sign bits or zero bits in the scalar to use a
14163 // widening opcode by splatting to smaller element size.
14164 unsigned EltBits = VT.getScalarSizeInBits();
14165 unsigned ScalarBits = Op.getValueSizeInBits();
14166 // If we're not getting all bits from the element, we need special handling.
14167 if (ScalarBits < EltBits) {
14168 // This should only occur on RV32.
14169 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
14170 !Subtarget.is64Bit() && "Unexpected splat");
14171 // vmv.v.x sign extends narrow inputs.
14172 SupportsSExt = true;
14173
14174 // If the input is positive, then sign extend is also zero extend.
14175 if (DAG.SignBitIsZero(Op))
14176 SupportsZExt = true;
14177
14178 EnforceOneUse = false;
14179 return;
14180 }
14181
14182 unsigned NarrowSize = EltBits / 2;
14183 // If the narrow type cannot be expressed with a legal VMV,
14184 // this is not a valid candidate.
14185 if (NarrowSize < 8)
14186 return;
14187
14188 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
14189 SupportsSExt = true;
14190
14191 if (DAG.MaskedValueIsZero(Op,
14192 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
14193 SupportsZExt = true;
14194
14195 EnforceOneUse = false;
14196 }
14197
14198 /// Helper method to set the various fields of this struct based on the
14199 /// type of \p Root.
14200 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
14201 const RISCVSubtarget &Subtarget) {
14202 SupportsZExt = false;
14203 SupportsSExt = false;
14204 SupportsFPExt = false;
14205 EnforceOneUse = true;
14206 unsigned Opc = OrigOperand.getOpcode();
14207 // For the nodes we handle below, we end up using their inputs directly: see
14208 // getSource(). However since they either don't have a passthru or we check
14209 // that their passthru is undef, we can safely ignore their mask and VL.
14210 switch (Opc) {
14211 case ISD::ZERO_EXTEND:
14212 case ISD::SIGN_EXTEND: {
14213 MVT VT = OrigOperand.getSimpleValueType();
14214 if (!VT.isVector())
14215 break;
14216
14217 SDValue NarrowElt = OrigOperand.getOperand(0);
14218 MVT NarrowVT = NarrowElt.getSimpleValueType();
14219 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
14220 if (NarrowVT.getVectorElementType() == MVT::i1)
14221 break;
14222
14223 SupportsZExt = Opc == ISD::ZERO_EXTEND;
14224 SupportsSExt = Opc == ISD::SIGN_EXTEND;
14225 break;
14226 }
14227 case RISCVISD::VZEXT_VL:
14228 SupportsZExt = true;
14229 break;
14230 case RISCVISD::VSEXT_VL:
14231 SupportsSExt = true;
14232 break;
14234 SupportsFPExt = true;
14235 break;
14236 case ISD::SPLAT_VECTOR:
14238 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
14239 break;
14240 case RISCVISD::VFMV_V_F_VL: {
14241 MVT VT = OrigOperand.getSimpleValueType();
14242
14243 if (!OrigOperand.getOperand(0).isUndef())
14244 break;
14245
14246 SDValue Op = OrigOperand.getOperand(1);
14247 if (Op.getOpcode() != ISD::FP_EXTEND)
14248 break;
14249
14250 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
14251 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
14252 if (NarrowSize != ScalarBits)
14253 break;
14254
14255 SupportsFPExt = true;
14256 break;
14257 }
14258 default:
14259 break;
14260 }
14261 }
14262
14263 /// Check if \p Root supports any extension folding combines.
14264 static bool isSupportedRoot(const SDNode *Root,
14265 const RISCVSubtarget &Subtarget) {
14266 switch (Root->getOpcode()) {
14267 case ISD::ADD:
14268 case ISD::SUB:
14269 case ISD::MUL: {
14270 return Root->getValueType(0).isScalableVector();
14271 }
14272 case ISD::OR: {
14273 return Root->getValueType(0).isScalableVector() &&
14274 Root->getFlags().hasDisjoint();
14275 }
14276 // Vector Widening Integer Add/Sub/Mul Instructions
14277 case RISCVISD::ADD_VL:
14278 case RISCVISD::MUL_VL:
14281 case RISCVISD::SUB_VL:
14284 // Vector Widening Floating-Point Add/Sub/Mul Instructions
14285 case RISCVISD::FADD_VL:
14286 case RISCVISD::FSUB_VL:
14287 case RISCVISD::FMUL_VL:
14290 return true;
14291 case ISD::SHL:
14292 return Root->getValueType(0).isScalableVector() &&
14293 Subtarget.hasStdExtZvbb();
14294 case RISCVISD::SHL_VL:
14295 return Subtarget.hasStdExtZvbb();
14300 return true;
14301 default:
14302 return false;
14303 }
14304 }
14305
14306 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
14307 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
14308 const RISCVSubtarget &Subtarget) {
14309 assert(isSupportedRoot(Root, Subtarget) &&
14310 "Trying to build an helper with an "
14311 "unsupported root");
14312 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
14314 OrigOperand = Root->getOperand(OperandIdx);
14315
14316 unsigned Opc = Root->getOpcode();
14317 switch (Opc) {
14318 // We consider
14319 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
14320 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
14321 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
14328 if (OperandIdx == 1) {
14329 SupportsZExt =
14331 SupportsSExt =
14333 SupportsFPExt =
14335 // There's no existing extension here, so we don't have to worry about
14336 // making sure it gets removed.
14337 EnforceOneUse = false;
14338 break;
14339 }
14340 [[fallthrough]];
14341 default:
14342 fillUpExtensionSupport(Root, DAG, Subtarget);
14343 break;
14344 }
14345 }
14346
14347 /// Helper function to get the Mask and VL from \p Root.
14348 static std::pair<SDValue, SDValue>
14349 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
14350 const RISCVSubtarget &Subtarget) {
14351 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
14352 switch (Root->getOpcode()) {
14353 case ISD::ADD:
14354 case ISD::SUB:
14355 case ISD::MUL:
14356 case ISD::OR:
14357 case ISD::SHL: {
14358 SDLoc DL(Root);
14359 MVT VT = Root->getSimpleValueType(0);
14360 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
14361 }
14362 default:
14363 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
14364 }
14365 }
14366
14367 /// Helper function to check if \p N is commutative with respect to the
14368 /// foldings that are supported by this class.
14369 static bool isCommutative(const SDNode *N) {
14370 switch (N->getOpcode()) {
14371 case ISD::ADD:
14372 case ISD::MUL:
14373 case ISD::OR:
14374 case RISCVISD::ADD_VL:
14375 case RISCVISD::MUL_VL:
14378 case RISCVISD::FADD_VL:
14379 case RISCVISD::FMUL_VL:
14385 return true;
14386 case ISD::SUB:
14387 case RISCVISD::SUB_VL:
14390 case RISCVISD::FSUB_VL:
14392 case ISD::SHL:
14393 case RISCVISD::SHL_VL:
14394 return false;
14395 default:
14396 llvm_unreachable("Unexpected opcode");
14397 }
14398 }
14399
14400 /// Get a list of combine to try for folding extensions in \p Root.
14401 /// Note that each returned CombineToTry function doesn't actually modify
14402 /// anything. Instead they produce an optional CombineResult that if not None,
14403 /// need to be materialized for the combine to be applied.
14404 /// \see CombineResult::materialize.
14405 /// If the related CombineToTry function returns std::nullopt, that means the
14406 /// combine didn't match.
14407 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
14408};
14409
14410/// Helper structure that holds all the necessary information to materialize a
14411/// combine that does some extension folding.
14412struct CombineResult {
14413 /// Opcode to be generated when materializing the combine.
14414 unsigned TargetOpcode;
14415 // No value means no extension is needed.
14416 std::optional<ExtKind> LHSExt;
14417 std::optional<ExtKind> RHSExt;
14418 /// Root of the combine.
14419 SDNode *Root;
14420 /// LHS of the TargetOpcode.
14421 NodeExtensionHelper LHS;
14422 /// RHS of the TargetOpcode.
14423 NodeExtensionHelper RHS;
14424
14425 CombineResult(unsigned TargetOpcode, SDNode *Root,
14426 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
14427 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
14428 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
14429 LHS(LHS), RHS(RHS) {}
14430
14431 /// Return a value that uses TargetOpcode and that can be used to replace
14432 /// Root.
14433 /// The actual replacement is *not* done in that method.
14434 SDValue materialize(SelectionDAG &DAG,
14435 const RISCVSubtarget &Subtarget) const {
14436 SDValue Mask, VL, Passthru;
14437 std::tie(Mask, VL) =
14438 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
14439 switch (Root->getOpcode()) {
14440 default:
14441 Passthru = Root->getOperand(2);
14442 break;
14443 case ISD::ADD:
14444 case ISD::SUB:
14445 case ISD::MUL:
14446 case ISD::OR:
14447 case ISD::SHL:
14448 Passthru = DAG.getUNDEF(Root->getValueType(0));
14449 break;
14450 }
14451 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
14452 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
14453 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
14454 Passthru, Mask, VL);
14455 }
14456};
14457
14458/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14459/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14460/// are zext) and LHS and RHS can be folded into Root.
14461/// AllowExtMask define which form `ext` can take in this pattern.
14462///
14463/// \note If the pattern can match with both zext and sext, the returned
14464/// CombineResult will feature the zext result.
14465///
14466/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14467/// can be used to apply the pattern.
14468static std::optional<CombineResult>
14469canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
14470 const NodeExtensionHelper &RHS,
14471 uint8_t AllowExtMask, SelectionDAG &DAG,
14472 const RISCVSubtarget &Subtarget) {
14473 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
14474 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
14475 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
14476 /*RHSExt=*/{ExtKind::ZExt});
14477 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
14478 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
14479 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14480 /*RHSExt=*/{ExtKind::SExt});
14481 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
14482 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
14483 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
14484 /*RHSExt=*/{ExtKind::FPExt});
14485 return std::nullopt;
14486}
14487
14488/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14489/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14490/// are zext) and LHS and RHS can be folded into Root.
14491///
14492/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14493/// can be used to apply the pattern.
14494static std::optional<CombineResult>
14495canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
14496 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14497 const RISCVSubtarget &Subtarget) {
14498 return canFoldToVWWithSameExtensionImpl(
14499 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
14500 Subtarget);
14501}
14502
14503/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
14504///
14505/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14506/// can be used to apply the pattern.
14507static std::optional<CombineResult>
14508canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
14509 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14510 const RISCVSubtarget &Subtarget) {
14511 if (RHS.SupportsFPExt)
14512 return CombineResult(
14513 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
14514 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
14515
14516 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
14517 // sext/zext?
14518 // Control this behavior behind an option (AllowSplatInVW_W) for testing
14519 // purposes.
14520 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
14521 return CombineResult(
14522 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
14523 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
14524 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
14525 return CombineResult(
14526 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
14527 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
14528 return std::nullopt;
14529}
14530
14531/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
14532///
14533/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14534/// can be used to apply the pattern.
14535static std::optional<CombineResult>
14536canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14537 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14538 const RISCVSubtarget &Subtarget) {
14539 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
14540 Subtarget);
14541}
14542
14543/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
14544///
14545/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14546/// can be used to apply the pattern.
14547static std::optional<CombineResult>
14548canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14549 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14550 const RISCVSubtarget &Subtarget) {
14551 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
14552 Subtarget);
14553}
14554
14555/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
14556///
14557/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14558/// can be used to apply the pattern.
14559static std::optional<CombineResult>
14560canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14561 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14562 const RISCVSubtarget &Subtarget) {
14563 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
14564 Subtarget);
14565}
14566
14567/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
14568///
14569/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14570/// can be used to apply the pattern.
14571static std::optional<CombineResult>
14572canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
14573 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14574 const RISCVSubtarget &Subtarget) {
14575
14576 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
14577 return std::nullopt;
14578 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
14579 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14580 /*RHSExt=*/{ExtKind::ZExt});
14581}
14582
14584NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
14585 SmallVector<CombineToTry> Strategies;
14586 switch (Root->getOpcode()) {
14587 case ISD::ADD:
14588 case ISD::SUB:
14589 case ISD::OR:
14590 case RISCVISD::ADD_VL:
14591 case RISCVISD::SUB_VL:
14592 case RISCVISD::FADD_VL:
14593 case RISCVISD::FSUB_VL:
14594 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
14595 Strategies.push_back(canFoldToVWWithSameExtension);
14596 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
14597 Strategies.push_back(canFoldToVW_W);
14598 break;
14599 case RISCVISD::FMUL_VL:
14604 Strategies.push_back(canFoldToVWWithSameExtension);
14605 break;
14606 case ISD::MUL:
14607 case RISCVISD::MUL_VL:
14608 // mul -> vwmul(u)
14609 Strategies.push_back(canFoldToVWWithSameExtension);
14610 // mul -> vwmulsu
14611 Strategies.push_back(canFoldToVW_SU);
14612 break;
14613 case ISD::SHL:
14614 case RISCVISD::SHL_VL:
14615 // shl -> vwsll
14616 Strategies.push_back(canFoldToVWWithZEXT);
14617 break;
14620 // vwadd_w|vwsub_w -> vwadd|vwsub
14621 Strategies.push_back(canFoldToVWWithSEXT);
14622 break;
14625 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
14626 Strategies.push_back(canFoldToVWWithZEXT);
14627 break;
14630 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
14631 Strategies.push_back(canFoldToVWWithFPEXT);
14632 break;
14633 default:
14634 llvm_unreachable("Unexpected opcode");
14635 }
14636 return Strategies;
14637}
14638} // End anonymous namespace.
14639
14640/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
14641/// The supported combines are:
14642/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14643/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14644/// mul | mul_vl -> vwmul(u) | vwmul_su
14645/// shl | shl_vl -> vwsll
14646/// fadd_vl -> vfwadd | vfwadd_w
14647/// fsub_vl -> vfwsub | vfwsub_w
14648/// fmul_vl -> vfwmul
14649/// vwadd_w(u) -> vwadd(u)
14650/// vwsub_w(u) -> vwsub(u)
14651/// vfwadd_w -> vfwadd
14652/// vfwsub_w -> vfwsub
14655 const RISCVSubtarget &Subtarget) {
14656 SelectionDAG &DAG = DCI.DAG;
14657 if (DCI.isBeforeLegalize())
14658 return SDValue();
14659
14660 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
14661 return SDValue();
14662
14663 SmallVector<SDNode *> Worklist;
14664 SmallSet<SDNode *, 8> Inserted;
14665 Worklist.push_back(N);
14666 Inserted.insert(N);
14667 SmallVector<CombineResult> CombinesToApply;
14668
14669 while (!Worklist.empty()) {
14670 SDNode *Root = Worklist.pop_back_val();
14671
14672 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
14673 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
14674 auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
14675 &Inserted](const NodeExtensionHelper &Op) {
14676 if (Op.needToPromoteOtherUsers()) {
14677 for (SDNode::use_iterator UI = Op.OrigOperand->use_begin(),
14678 UE = Op.OrigOperand->use_end();
14679 UI != UE; ++UI) {
14680 SDNode *TheUse = *UI;
14681 if (!NodeExtensionHelper::isSupportedRoot(TheUse, Subtarget))
14682 return false;
14683 // We only support the first 2 operands of FMA.
14684 if (UI.getOperandNo() >= 2)
14685 return false;
14686 if (Inserted.insert(TheUse).second)
14687 Worklist.push_back(TheUse);
14688 }
14689 }
14690 return true;
14691 };
14692
14693 // Control the compile time by limiting the number of node we look at in
14694 // total.
14695 if (Inserted.size() > ExtensionMaxWebSize)
14696 return SDValue();
14697
14699 NodeExtensionHelper::getSupportedFoldings(Root);
14700
14701 assert(!FoldingStrategies.empty() && "Nothing to be folded");
14702 bool Matched = false;
14703 for (int Attempt = 0;
14704 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
14705 ++Attempt) {
14706
14707 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
14708 FoldingStrategies) {
14709 std::optional<CombineResult> Res =
14710 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
14711 if (Res) {
14712 Matched = true;
14713 CombinesToApply.push_back(*Res);
14714 // All the inputs that are extended need to be folded, otherwise
14715 // we would be leaving the old input (since it is may still be used),
14716 // and the new one.
14717 if (Res->LHSExt.has_value())
14718 if (!AppendUsersIfNeeded(LHS))
14719 return SDValue();
14720 if (Res->RHSExt.has_value())
14721 if (!AppendUsersIfNeeded(RHS))
14722 return SDValue();
14723 break;
14724 }
14725 }
14726 std::swap(LHS, RHS);
14727 }
14728 // Right now we do an all or nothing approach.
14729 if (!Matched)
14730 return SDValue();
14731 }
14732 // Store the value for the replacement of the input node separately.
14733 SDValue InputRootReplacement;
14734 // We do the RAUW after we materialize all the combines, because some replaced
14735 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
14736 // some of these nodes may appear in the NodeExtensionHelpers of some of the
14737 // yet-to-be-visited CombinesToApply roots.
14739 ValuesToReplace.reserve(CombinesToApply.size());
14740 for (CombineResult Res : CombinesToApply) {
14741 SDValue NewValue = Res.materialize(DAG, Subtarget);
14742 if (!InputRootReplacement) {
14743 assert(Res.Root == N &&
14744 "First element is expected to be the current node");
14745 InputRootReplacement = NewValue;
14746 } else {
14747 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
14748 }
14749 }
14750 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
14751 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
14752 DCI.AddToWorklist(OldNewValues.second.getNode());
14753 }
14754 return InputRootReplacement;
14755}
14756
14757// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
14758// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
14759// y will be the Passthru and cond will be the Mask.
14761 unsigned Opc = N->getOpcode();
14764
14765 SDValue Y = N->getOperand(0);
14766 SDValue MergeOp = N->getOperand(1);
14767 unsigned MergeOpc = MergeOp.getOpcode();
14768
14769 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
14770 return SDValue();
14771
14772 SDValue X = MergeOp->getOperand(1);
14773
14774 if (!MergeOp.hasOneUse())
14775 return SDValue();
14776
14777 // Passthru should be undef
14778 SDValue Passthru = N->getOperand(2);
14779 if (!Passthru.isUndef())
14780 return SDValue();
14781
14782 // Mask should be all ones
14783 SDValue Mask = N->getOperand(3);
14784 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
14785 return SDValue();
14786
14787 // False value of MergeOp should be all zeros
14788 SDValue Z = MergeOp->getOperand(2);
14789
14790 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
14791 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
14792 Z = Z.getOperand(1);
14793
14794 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
14795 return SDValue();
14796
14797 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
14798 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
14799 N->getFlags());
14800}
14801
14804 const RISCVSubtarget &Subtarget) {
14805 [[maybe_unused]] unsigned Opc = N->getOpcode();
14808
14809 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
14810 return V;
14811
14812 return combineVWADDSUBWSelect(N, DCI.DAG);
14813}
14814
14815// Helper function for performMemPairCombine.
14816// Try to combine the memory loads/stores LSNode1 and LSNode2
14817// into a single memory pair operation.
14819 LSBaseSDNode *LSNode2, SDValue BasePtr,
14820 uint64_t Imm) {
14822 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
14823
14824 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
14825 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
14826 return SDValue();
14827
14829 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14830
14831 // The new operation has twice the width.
14832 MVT XLenVT = Subtarget.getXLenVT();
14833 EVT MemVT = LSNode1->getMemoryVT();
14834 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
14835 MachineMemOperand *MMO = LSNode1->getMemOperand();
14837 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
14838
14839 if (LSNode1->getOpcode() == ISD::LOAD) {
14840 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
14841 unsigned Opcode;
14842 if (MemVT == MVT::i32)
14843 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
14844 else
14845 Opcode = RISCVISD::TH_LDD;
14846
14847 SDValue Res = DAG.getMemIntrinsicNode(
14848 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
14849 {LSNode1->getChain(), BasePtr,
14850 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14851 NewMemVT, NewMMO);
14852
14853 SDValue Node1 =
14854 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
14855 SDValue Node2 =
14856 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
14857
14858 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
14859 return Node1;
14860 } else {
14861 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
14862
14863 SDValue Res = DAG.getMemIntrinsicNode(
14864 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
14865 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
14866 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14867 NewMemVT, NewMMO);
14868
14869 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
14870 return Res;
14871 }
14872}
14873
14874// Try to combine two adjacent loads/stores to a single pair instruction from
14875// the XTHeadMemPair vendor extension.
14878 SelectionDAG &DAG = DCI.DAG;
14880 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14881
14882 // Target does not support load/store pair.
14883 if (!Subtarget.hasVendorXTHeadMemPair())
14884 return SDValue();
14885
14886 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
14887 EVT MemVT = LSNode1->getMemoryVT();
14888 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
14889
14890 // No volatile, indexed or atomic loads/stores.
14891 if (!LSNode1->isSimple() || LSNode1->isIndexed())
14892 return SDValue();
14893
14894 // Function to get a base + constant representation from a memory value.
14895 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
14896 if (Ptr->getOpcode() == ISD::ADD)
14897 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
14898 return {Ptr->getOperand(0), C1->getZExtValue()};
14899 return {Ptr, 0};
14900 };
14901
14902 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
14903
14904 SDValue Chain = N->getOperand(0);
14905 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
14906 UI != UE; ++UI) {
14907 SDUse &Use = UI.getUse();
14908 if (Use.getUser() != N && Use.getResNo() == 0 &&
14909 Use.getUser()->getOpcode() == N->getOpcode()) {
14910 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
14911
14912 // No volatile, indexed or atomic loads/stores.
14913 if (!LSNode2->isSimple() || LSNode2->isIndexed())
14914 continue;
14915
14916 // Check if LSNode1 and LSNode2 have the same type and extension.
14917 if (LSNode1->getOpcode() == ISD::LOAD)
14918 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
14919 cast<LoadSDNode>(LSNode1)->getExtensionType())
14920 continue;
14921
14922 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
14923 continue;
14924
14925 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
14926
14927 // Check if the base pointer is the same for both instruction.
14928 if (Base1 != Base2)
14929 continue;
14930
14931 // Check if the offsets match the XTHeadMemPair encoding contraints.
14932 bool Valid = false;
14933 if (MemVT == MVT::i32) {
14934 // Check for adjacent i32 values and a 2-bit index.
14935 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
14936 Valid = true;
14937 } else if (MemVT == MVT::i64) {
14938 // Check for adjacent i64 values and a 2-bit index.
14939 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
14940 Valid = true;
14941 }
14942
14943 if (!Valid)
14944 continue;
14945
14946 // Try to combine.
14947 if (SDValue Res =
14948 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
14949 return Res;
14950 }
14951 }
14952
14953 return SDValue();
14954}
14955
14956// Fold
14957// (fp_to_int (froundeven X)) -> fcvt X, rne
14958// (fp_to_int (ftrunc X)) -> fcvt X, rtz
14959// (fp_to_int (ffloor X)) -> fcvt X, rdn
14960// (fp_to_int (fceil X)) -> fcvt X, rup
14961// (fp_to_int (fround X)) -> fcvt X, rmm
14962// (fp_to_int (frint X)) -> fcvt X
14965 const RISCVSubtarget &Subtarget) {
14966 SelectionDAG &DAG = DCI.DAG;
14967 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14968 MVT XLenVT = Subtarget.getXLenVT();
14969
14970 SDValue Src = N->getOperand(0);
14971
14972 // Don't do this for strict-fp Src.
14973 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
14974 return SDValue();
14975
14976 // Ensure the FP type is legal.
14977 if (!TLI.isTypeLegal(Src.getValueType()))
14978 return SDValue();
14979
14980 // Don't do this for f16 with Zfhmin and not Zfh.
14981 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
14982 return SDValue();
14983
14984 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
14985 // If the result is invalid, we didn't find a foldable instruction.
14986 if (FRM == RISCVFPRndMode::Invalid)
14987 return SDValue();
14988
14989 SDLoc DL(N);
14990 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
14991 EVT VT = N->getValueType(0);
14992
14993 if (VT.isVector() && TLI.isTypeLegal(VT)) {
14994 MVT SrcVT = Src.getSimpleValueType();
14995 MVT SrcContainerVT = SrcVT;
14996 MVT ContainerVT = VT.getSimpleVT();
14997 SDValue XVal = Src.getOperand(0);
14998
14999 // For widening and narrowing conversions we just combine it into a
15000 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
15001 // end up getting lowered to their appropriate pseudo instructions based on
15002 // their operand types
15003 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
15004 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
15005 return SDValue();
15006
15007 // Make fixed-length vectors scalable first
15008 if (SrcVT.isFixedLengthVector()) {
15009 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
15010 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
15011 ContainerVT =
15012 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
15013 }
15014
15015 auto [Mask, VL] =
15016 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
15017
15018 SDValue FpToInt;
15019 if (FRM == RISCVFPRndMode::RTZ) {
15020 // Use the dedicated trunc static rounding mode if we're truncating so we
15021 // don't need to generate calls to fsrmi/fsrm
15022 unsigned Opc =
15024 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
15025 } else if (FRM == RISCVFPRndMode::DYN) {
15026 unsigned Opc =
15028 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
15029 } else {
15030 unsigned Opc =
15032 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
15033 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
15034 }
15035
15036 // If converted from fixed-length to scalable, convert back
15037 if (VT.isFixedLengthVector())
15038 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
15039
15040 return FpToInt;
15041 }
15042
15043 // Only handle XLen or i32 types. Other types narrower than XLen will
15044 // eventually be legalized to XLenVT.
15045 if (VT != MVT::i32 && VT != XLenVT)
15046 return SDValue();
15047
15048 unsigned Opc;
15049 if (VT == XLenVT)
15050 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
15051 else
15053
15054 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
15055 DAG.getTargetConstant(FRM, DL, XLenVT));
15056 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
15057}
15058
15059// Fold
15060// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
15061// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
15062// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
15063// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
15064// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
15065// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
15068 const RISCVSubtarget &Subtarget) {
15069 SelectionDAG &DAG = DCI.DAG;
15070 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15071 MVT XLenVT = Subtarget.getXLenVT();
15072
15073 // Only handle XLen types. Other types narrower than XLen will eventually be
15074 // legalized to XLenVT.
15075 EVT DstVT = N->getValueType(0);
15076 if (DstVT != XLenVT)
15077 return SDValue();
15078
15079 SDValue Src = N->getOperand(0);
15080
15081 // Don't do this for strict-fp Src.
15082 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
15083 return SDValue();
15084
15085 // Ensure the FP type is also legal.
15086 if (!TLI.isTypeLegal(Src.getValueType()))
15087 return SDValue();
15088
15089 // Don't do this for f16 with Zfhmin and not Zfh.
15090 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
15091 return SDValue();
15092
15093 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
15094
15095 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
15096 if (FRM == RISCVFPRndMode::Invalid)
15097 return SDValue();
15098
15099 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
15100
15101 unsigned Opc;
15102 if (SatVT == DstVT)
15103 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
15104 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
15106 else
15107 return SDValue();
15108 // FIXME: Support other SatVTs by clamping before or after the conversion.
15109
15110 Src = Src.getOperand(0);
15111
15112 SDLoc DL(N);
15113 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
15114 DAG.getTargetConstant(FRM, DL, XLenVT));
15115
15116 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
15117 // extend.
15118 if (Opc == RISCVISD::FCVT_WU_RV64)
15119 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
15120
15121 // RISC-V FP-to-int conversions saturate to the destination register size, but
15122 // don't produce 0 for nan.
15123 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
15124 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
15125}
15126
15127// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
15128// smaller than XLenVT.
15130 const RISCVSubtarget &Subtarget) {
15131 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
15132
15133 SDValue Src = N->getOperand(0);
15134 if (Src.getOpcode() != ISD::BSWAP)
15135 return SDValue();
15136
15137 EVT VT = N->getValueType(0);
15138 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
15139 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
15140 return SDValue();
15141
15142 SDLoc DL(N);
15143 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
15144}
15145
15146// Convert from one FMA opcode to another based on whether we are negating the
15147// multiply result and/or the accumulator.
15148// NOTE: Only supports RVV operations with VL.
15149static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
15150 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
15151 if (NegMul) {
15152 // clang-format off
15153 switch (Opcode) {
15154 default: llvm_unreachable("Unexpected opcode");
15155 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
15156 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
15157 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
15158 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
15163 }
15164 // clang-format on
15165 }
15166
15167 // Negating the accumulator changes ADD<->SUB.
15168 if (NegAcc) {
15169 // clang-format off
15170 switch (Opcode) {
15171 default: llvm_unreachable("Unexpected opcode");
15172 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
15173 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
15174 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
15175 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
15180 }
15181 // clang-format on
15182 }
15183
15184 return Opcode;
15185}
15186
15188 // Fold FNEG_VL into FMA opcodes.
15189 // The first operand of strict-fp is chain.
15190 unsigned Offset = N->isTargetStrictFPOpcode();
15191 SDValue A = N->getOperand(0 + Offset);
15192 SDValue B = N->getOperand(1 + Offset);
15193 SDValue C = N->getOperand(2 + Offset);
15194 SDValue Mask = N->getOperand(3 + Offset);
15195 SDValue VL = N->getOperand(4 + Offset);
15196
15197 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
15198 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
15199 V.getOperand(2) == VL) {
15200 // Return the negated input.
15201 V = V.getOperand(0);
15202 return true;
15203 }
15204
15205 return false;
15206 };
15207
15208 bool NegA = invertIfNegative(A);
15209 bool NegB = invertIfNegative(B);
15210 bool NegC = invertIfNegative(C);
15211
15212 // If no operands are negated, we're done.
15213 if (!NegA && !NegB && !NegC)
15214 return SDValue();
15215
15216 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
15217 if (N->isTargetStrictFPOpcode())
15218 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
15219 {N->getOperand(0), A, B, C, Mask, VL});
15220 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
15221 VL);
15222}
15223
15226 const RISCVSubtarget &Subtarget) {
15227 SelectionDAG &DAG = DCI.DAG;
15228
15230 return V;
15231
15232 if (N->getValueType(0).getVectorElementType() == MVT::f32 &&
15233 !Subtarget.hasVInstructionsF16())
15234 return SDValue();
15235
15236 // FIXME: Ignore strict opcodes for now.
15237 if (N->isTargetStrictFPOpcode())
15238 return SDValue();
15239
15240 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
15241}
15242
15244 const RISCVSubtarget &Subtarget) {
15245 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
15246
15247 EVT VT = N->getValueType(0);
15248
15249 if (VT != Subtarget.getXLenVT())
15250 return SDValue();
15251
15252 if (!isa<ConstantSDNode>(N->getOperand(1)))
15253 return SDValue();
15254 uint64_t ShAmt = N->getConstantOperandVal(1);
15255
15256 SDValue N0 = N->getOperand(0);
15257
15258 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
15259 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
15260 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
15261 unsigned ExtSize =
15262 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
15263 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
15264 N0.getOperand(0).hasOneUse() &&
15265 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
15266 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
15267 if (LShAmt < ExtSize) {
15268 unsigned Size = VT.getSizeInBits();
15269 SDLoc ShlDL(N0.getOperand(0));
15270 SDValue Shl =
15271 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
15272 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
15273 SDLoc DL(N);
15274 return DAG.getNode(ISD::SRA, DL, VT, Shl,
15275 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
15276 }
15277 }
15278 }
15279
15280 if (ShAmt > 32 || VT != MVT::i64)
15281 return SDValue();
15282
15283 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
15284 // FIXME: Should this be a generic combine? There's a similar combine on X86.
15285 //
15286 // Also try these folds where an add or sub is in the middle.
15287 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
15288 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
15289 SDValue Shl;
15290 ConstantSDNode *AddC = nullptr;
15291
15292 // We might have an ADD or SUB between the SRA and SHL.
15293 bool IsAdd = N0.getOpcode() == ISD::ADD;
15294 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
15295 // Other operand needs to be a constant we can modify.
15296 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
15297 if (!AddC)
15298 return SDValue();
15299
15300 // AddC needs to have at least 32 trailing zeros.
15301 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
15302 return SDValue();
15303
15304 // All users should be a shift by constant less than or equal to 32. This
15305 // ensures we'll do this optimization for each of them to produce an
15306 // add/sub+sext_inreg they can all share.
15307 for (SDNode *U : N0->uses()) {
15308 if (U->getOpcode() != ISD::SRA ||
15309 !isa<ConstantSDNode>(U->getOperand(1)) ||
15310 U->getConstantOperandVal(1) > 32)
15311 return SDValue();
15312 }
15313
15314 Shl = N0.getOperand(IsAdd ? 0 : 1);
15315 } else {
15316 // Not an ADD or SUB.
15317 Shl = N0;
15318 }
15319
15320 // Look for a shift left by 32.
15321 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
15322 Shl.getConstantOperandVal(1) != 32)
15323 return SDValue();
15324
15325 // We if we didn't look through an add/sub, then the shl should have one use.
15326 // If we did look through an add/sub, the sext_inreg we create is free so
15327 // we're only creating 2 new instructions. It's enough to only remove the
15328 // original sra+add/sub.
15329 if (!AddC && !Shl.hasOneUse())
15330 return SDValue();
15331
15332 SDLoc DL(N);
15333 SDValue In = Shl.getOperand(0);
15334
15335 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
15336 // constant.
15337 if (AddC) {
15338 SDValue ShiftedAddC =
15339 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
15340 if (IsAdd)
15341 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
15342 else
15343 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
15344 }
15345
15346 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
15347 DAG.getValueType(MVT::i32));
15348 if (ShAmt == 32)
15349 return SExt;
15350
15351 return DAG.getNode(
15352 ISD::SHL, DL, MVT::i64, SExt,
15353 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
15354}
15355
15356// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
15357// the result is used as the conditon of a br_cc or select_cc we can invert,
15358// inverting the setcc is free, and Z is 0/1. Caller will invert the
15359// br_cc/select_cc.
15361 bool IsAnd = Cond.getOpcode() == ISD::AND;
15362 if (!IsAnd && Cond.getOpcode() != ISD::OR)
15363 return SDValue();
15364
15365 if (!Cond.hasOneUse())
15366 return SDValue();
15367
15368 SDValue Setcc = Cond.getOperand(0);
15369 SDValue Xor = Cond.getOperand(1);
15370 // Canonicalize setcc to LHS.
15371 if (Setcc.getOpcode() != ISD::SETCC)
15372 std::swap(Setcc, Xor);
15373 // LHS should be a setcc and RHS should be an xor.
15374 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
15375 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
15376 return SDValue();
15377
15378 // If the condition is an And, SimplifyDemandedBits may have changed
15379 // (xor Z, 1) to (not Z).
15380 SDValue Xor1 = Xor.getOperand(1);
15381 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
15382 return SDValue();
15383
15384 EVT VT = Cond.getValueType();
15385 SDValue Xor0 = Xor.getOperand(0);
15386
15387 // The LHS of the xor needs to be 0/1.
15389 if (!DAG.MaskedValueIsZero(Xor0, Mask))
15390 return SDValue();
15391
15392 // We can only invert integer setccs.
15393 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
15394 if (!SetCCOpVT.isScalarInteger())
15395 return SDValue();
15396
15397 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
15398 if (ISD::isIntEqualitySetCC(CCVal)) {
15399 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15400 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
15401 Setcc.getOperand(1), CCVal);
15402 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
15403 // Invert (setlt 0, X) by converting to (setlt X, 1).
15404 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
15405 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
15406 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
15407 // (setlt X, 1) by converting to (setlt 0, X).
15408 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
15409 DAG.getConstant(0, SDLoc(Setcc), VT),
15410 Setcc.getOperand(0), CCVal);
15411 } else
15412 return SDValue();
15413
15414 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15415 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
15416}
15417
15418// Perform common combines for BR_CC and SELECT_CC condtions.
15419static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
15420 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
15421 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
15422
15423 // As far as arithmetic right shift always saves the sign,
15424 // shift can be omitted.
15425 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
15426 // setge (sra X, N), 0 -> setge X, 0
15427 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
15428 LHS.getOpcode() == ISD::SRA) {
15429 LHS = LHS.getOperand(0);
15430 return true;
15431 }
15432
15433 if (!ISD::isIntEqualitySetCC(CCVal))
15434 return false;
15435
15436 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
15437 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
15438 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
15439 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
15440 // If we're looking for eq 0 instead of ne 0, we need to invert the
15441 // condition.
15442 bool Invert = CCVal == ISD::SETEQ;
15443 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
15444 if (Invert)
15445 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15446
15447 RHS = LHS.getOperand(1);
15448 LHS = LHS.getOperand(0);
15449 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
15450
15451 CC = DAG.getCondCode(CCVal);
15452 return true;
15453 }
15454
15455 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
15456 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
15457 RHS = LHS.getOperand(1);
15458 LHS = LHS.getOperand(0);
15459 return true;
15460 }
15461
15462 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
15463 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
15464 LHS.getOperand(1).getOpcode() == ISD::Constant) {
15465 SDValue LHS0 = LHS.getOperand(0);
15466 if (LHS0.getOpcode() == ISD::AND &&
15467 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
15468 uint64_t Mask = LHS0.getConstantOperandVal(1);
15469 uint64_t ShAmt = LHS.getConstantOperandVal(1);
15470 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
15471 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
15472 CC = DAG.getCondCode(CCVal);
15473
15474 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
15475 LHS = LHS0.getOperand(0);
15476 if (ShAmt != 0)
15477 LHS =
15478 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
15479 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
15480 return true;
15481 }
15482 }
15483 }
15484
15485 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
15486 // This can occur when legalizing some floating point comparisons.
15487 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
15488 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
15489 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15490 CC = DAG.getCondCode(CCVal);
15491 RHS = DAG.getConstant(0, DL, LHS.getValueType());
15492 return true;
15493 }
15494
15495 if (isNullConstant(RHS)) {
15496 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
15497 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15498 CC = DAG.getCondCode(CCVal);
15499 LHS = NewCond;
15500 return true;
15501 }
15502 }
15503
15504 return false;
15505}
15506
15507// Fold
15508// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
15509// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
15510// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
15511// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
15513 SDValue TrueVal, SDValue FalseVal,
15514 bool Swapped) {
15515 bool Commutative = true;
15516 unsigned Opc = TrueVal.getOpcode();
15517 switch (Opc) {
15518 default:
15519 return SDValue();
15520 case ISD::SHL:
15521 case ISD::SRA:
15522 case ISD::SRL:
15523 case ISD::SUB:
15524 Commutative = false;
15525 break;
15526 case ISD::ADD:
15527 case ISD::OR:
15528 case ISD::XOR:
15529 break;
15530 }
15531
15532 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
15533 return SDValue();
15534
15535 unsigned OpToFold;
15536 if (FalseVal == TrueVal.getOperand(0))
15537 OpToFold = 0;
15538 else if (Commutative && FalseVal == TrueVal.getOperand(1))
15539 OpToFold = 1;
15540 else
15541 return SDValue();
15542
15543 EVT VT = N->getValueType(0);
15544 SDLoc DL(N);
15545 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
15546 EVT OtherOpVT = OtherOp.getValueType();
15547 SDValue IdentityOperand =
15548 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
15549 if (!Commutative)
15550 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
15551 assert(IdentityOperand && "No identity operand!");
15552
15553 if (Swapped)
15554 std::swap(OtherOp, IdentityOperand);
15555 SDValue NewSel =
15556 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
15557 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
15558}
15559
15560// This tries to get rid of `select` and `icmp` that are being used to handle
15561// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
15563 SDValue Cond = N->getOperand(0);
15564
15565 // This represents either CTTZ or CTLZ instruction.
15566 SDValue CountZeroes;
15567
15568 SDValue ValOnZero;
15569
15570 if (Cond.getOpcode() != ISD::SETCC)
15571 return SDValue();
15572
15573 if (!isNullConstant(Cond->getOperand(1)))
15574 return SDValue();
15575
15576 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
15577 if (CCVal == ISD::CondCode::SETEQ) {
15578 CountZeroes = N->getOperand(2);
15579 ValOnZero = N->getOperand(1);
15580 } else if (CCVal == ISD::CondCode::SETNE) {
15581 CountZeroes = N->getOperand(1);
15582 ValOnZero = N->getOperand(2);
15583 } else {
15584 return SDValue();
15585 }
15586
15587 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
15588 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
15589 CountZeroes = CountZeroes.getOperand(0);
15590
15591 if (CountZeroes.getOpcode() != ISD::CTTZ &&
15592 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
15593 CountZeroes.getOpcode() != ISD::CTLZ &&
15594 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
15595 return SDValue();
15596
15597 if (!isNullConstant(ValOnZero))
15598 return SDValue();
15599
15600 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
15601 if (Cond->getOperand(0) != CountZeroesArgument)
15602 return SDValue();
15603
15604 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
15605 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
15606 CountZeroes.getValueType(), CountZeroesArgument);
15607 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
15608 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
15609 CountZeroes.getValueType(), CountZeroesArgument);
15610 }
15611
15612 unsigned BitWidth = CountZeroes.getValueSizeInBits();
15613 SDValue BitWidthMinusOne =
15614 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
15615
15616 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
15617 CountZeroes, BitWidthMinusOne);
15618 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
15619}
15620
15622 const RISCVSubtarget &Subtarget) {
15623 SDValue Cond = N->getOperand(0);
15624 SDValue True = N->getOperand(1);
15625 SDValue False = N->getOperand(2);
15626 SDLoc DL(N);
15627 EVT VT = N->getValueType(0);
15628 EVT CondVT = Cond.getValueType();
15629
15630 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
15631 return SDValue();
15632
15633 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
15634 // BEXTI, where C is power of 2.
15635 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
15636 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
15637 SDValue LHS = Cond.getOperand(0);
15638 SDValue RHS = Cond.getOperand(1);
15639 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15640 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
15641 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
15642 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
15643 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
15644 return DAG.getSelect(DL, VT,
15645 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
15646 False, True);
15647 }
15648 }
15649 return SDValue();
15650}
15651
15653 const RISCVSubtarget &Subtarget) {
15654 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
15655 return Folded;
15656
15657 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
15658 return V;
15659
15660 if (Subtarget.hasConditionalMoveFusion())
15661 return SDValue();
15662
15663 SDValue TrueVal = N->getOperand(1);
15664 SDValue FalseVal = N->getOperand(2);
15665 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
15666 return V;
15667 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
15668}
15669
15670/// If we have a build_vector where each lane is binop X, C, where C
15671/// is a constant (but not necessarily the same constant on all lanes),
15672/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
15673/// We assume that materializing a constant build vector will be no more
15674/// expensive that performing O(n) binops.
15676 const RISCVSubtarget &Subtarget,
15677 const RISCVTargetLowering &TLI) {
15678 SDLoc DL(N);
15679 EVT VT = N->getValueType(0);
15680
15681 assert(!VT.isScalableVector() && "unexpected build vector");
15682
15683 if (VT.getVectorNumElements() == 1)
15684 return SDValue();
15685
15686 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
15687 if (!TLI.isBinOp(Opcode))
15688 return SDValue();
15689
15690 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
15691 return SDValue();
15692
15693 // This BUILD_VECTOR involves an implicit truncation, and sinking
15694 // truncates through binops is non-trivial.
15695 if (N->op_begin()->getValueType() != VT.getVectorElementType())
15696 return SDValue();
15697
15698 SmallVector<SDValue> LHSOps;
15699 SmallVector<SDValue> RHSOps;
15700 for (SDValue Op : N->ops()) {
15701 if (Op.isUndef()) {
15702 // We can't form a divide or remainder from undef.
15703 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
15704 return SDValue();
15705
15706 LHSOps.push_back(Op);
15707 RHSOps.push_back(Op);
15708 continue;
15709 }
15710
15711 // TODO: We can handle operations which have an neutral rhs value
15712 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
15713 // of profit in a more explicit manner.
15714 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
15715 return SDValue();
15716
15717 LHSOps.push_back(Op.getOperand(0));
15718 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
15719 !isa<ConstantFPSDNode>(Op.getOperand(1)))
15720 return SDValue();
15721 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15722 // have different LHS and RHS types.
15723 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
15724 return SDValue();
15725
15726 RHSOps.push_back(Op.getOperand(1));
15727 }
15728
15729 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
15730 DAG.getBuildVector(VT, DL, RHSOps));
15731}
15732
15734 const RISCVSubtarget &Subtarget,
15735 const RISCVTargetLowering &TLI) {
15736 SDValue InVec = N->getOperand(0);
15737 SDValue InVal = N->getOperand(1);
15738 SDValue EltNo = N->getOperand(2);
15739 SDLoc DL(N);
15740
15741 EVT VT = InVec.getValueType();
15742 if (VT.isScalableVector())
15743 return SDValue();
15744
15745 if (!InVec.hasOneUse())
15746 return SDValue();
15747
15748 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
15749 // move the insert_vector_elts into the arms of the binop. Note that
15750 // the new RHS must be a constant.
15751 const unsigned InVecOpcode = InVec->getOpcode();
15752 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
15753 InVal.hasOneUse()) {
15754 SDValue InVecLHS = InVec->getOperand(0);
15755 SDValue InVecRHS = InVec->getOperand(1);
15756 SDValue InValLHS = InVal->getOperand(0);
15757 SDValue InValRHS = InVal->getOperand(1);
15758
15760 return SDValue();
15761 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
15762 return SDValue();
15763 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15764 // have different LHS and RHS types.
15765 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
15766 return SDValue();
15768 InVecLHS, InValLHS, EltNo);
15770 InVecRHS, InValRHS, EltNo);
15771 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
15772 }
15773
15774 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
15775 // move the insert_vector_elt to the source operand of the concat_vector.
15776 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
15777 return SDValue();
15778
15779 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
15780 if (!IndexC)
15781 return SDValue();
15782 unsigned Elt = IndexC->getZExtValue();
15783
15784 EVT ConcatVT = InVec.getOperand(0).getValueType();
15785 if (ConcatVT.getVectorElementType() != InVal.getValueType())
15786 return SDValue();
15787 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
15788 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
15789
15790 unsigned ConcatOpIdx = Elt / ConcatNumElts;
15791 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
15792 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
15793 ConcatOp, InVal, NewIdx);
15794
15795 SmallVector<SDValue> ConcatOps;
15796 ConcatOps.append(InVec->op_begin(), InVec->op_end());
15797 ConcatOps[ConcatOpIdx] = ConcatOp;
15798 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15799}
15800
15801// If we're concatenating a series of vector loads like
15802// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
15803// Then we can turn this into a strided load by widening the vector elements
15804// vlse32 p, stride=n
15806 const RISCVSubtarget &Subtarget,
15807 const RISCVTargetLowering &TLI) {
15808 SDLoc DL(N);
15809 EVT VT = N->getValueType(0);
15810
15811 // Only perform this combine on legal MVTs.
15812 if (!TLI.isTypeLegal(VT))
15813 return SDValue();
15814
15815 // TODO: Potentially extend this to scalable vectors
15816 if (VT.isScalableVector())
15817 return SDValue();
15818
15819 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
15820 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
15821 !SDValue(BaseLd, 0).hasOneUse())
15822 return SDValue();
15823
15824 EVT BaseLdVT = BaseLd->getValueType(0);
15825
15826 // Go through the loads and check that they're strided
15828 Lds.push_back(BaseLd);
15829 Align Align = BaseLd->getAlign();
15830 for (SDValue Op : N->ops().drop_front()) {
15831 auto *Ld = dyn_cast<LoadSDNode>(Op);
15832 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
15833 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
15834 Ld->getValueType(0) != BaseLdVT)
15835 return SDValue();
15836
15837 Lds.push_back(Ld);
15838
15839 // The common alignment is the most restrictive (smallest) of all the loads
15840 Align = std::min(Align, Ld->getAlign());
15841 }
15842
15843 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
15844 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
15845 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
15846 // If the load ptrs can be decomposed into a common (Base + Index) with a
15847 // common constant stride, then return the constant stride.
15848 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
15849 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
15850 if (BIO1.equalBaseIndex(BIO2, DAG))
15851 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
15852
15853 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
15854 SDValue P1 = Ld1->getBasePtr();
15855 SDValue P2 = Ld2->getBasePtr();
15856 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
15857 return {{P2.getOperand(1), false}};
15858 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
15859 return {{P1.getOperand(1), true}};
15860
15861 return std::nullopt;
15862 };
15863
15864 // Get the distance between the first and second loads
15865 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
15866 if (!BaseDiff)
15867 return SDValue();
15868
15869 // Check all the loads are the same distance apart
15870 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
15871 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
15872 return SDValue();
15873
15874 // TODO: At this point, we've successfully matched a generalized gather
15875 // load. Maybe we should emit that, and then move the specialized
15876 // matchers above and below into a DAG combine?
15877
15878 // Get the widened scalar type, e.g. v4i8 -> i64
15879 unsigned WideScalarBitWidth =
15880 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
15881 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
15882
15883 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
15884 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
15885 if (!TLI.isTypeLegal(WideVecVT))
15886 return SDValue();
15887
15888 // Check that the operation is legal
15889 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
15890 return SDValue();
15891
15892 auto [StrideVariant, MustNegateStride] = *BaseDiff;
15893 SDValue Stride =
15894 std::holds_alternative<SDValue>(StrideVariant)
15895 ? std::get<SDValue>(StrideVariant)
15896 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
15897 Lds[0]->getOffset().getValueType());
15898 if (MustNegateStride)
15899 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
15900
15901 SDValue AllOneMask =
15902 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
15903 DAG.getConstant(1, DL, MVT::i1));
15904
15905 uint64_t MemSize;
15906 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
15907 ConstStride && ConstStride->getSExtValue() >= 0)
15908 // total size = (elsize * n) + (stride - elsize) * (n-1)
15909 // = elsize + stride * (n-1)
15910 MemSize = WideScalarVT.getSizeInBits() +
15911 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
15912 else
15913 // If Stride isn't constant, then we can't know how much it will load
15915
15917 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
15918 Align);
15919
15920 SDValue StridedLoad = DAG.getStridedLoadVP(
15921 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
15922 AllOneMask,
15923 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
15924
15925 for (SDValue Ld : N->ops())
15926 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
15927
15928 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
15929}
15930
15932 const RISCVSubtarget &Subtarget) {
15933
15934 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
15935
15936 if (N->getValueType(0).isFixedLengthVector())
15937 return SDValue();
15938
15939 SDValue Addend = N->getOperand(0);
15940 SDValue MulOp = N->getOperand(1);
15941
15942 if (N->getOpcode() == RISCVISD::ADD_VL) {
15943 SDValue AddPassthruOp = N->getOperand(2);
15944 if (!AddPassthruOp.isUndef())
15945 return SDValue();
15946 }
15947
15948 auto IsVWMulOpc = [](unsigned Opc) {
15949 switch (Opc) {
15950 case RISCVISD::VWMUL_VL:
15953 return true;
15954 default:
15955 return false;
15956 }
15957 };
15958
15959 if (!IsVWMulOpc(MulOp.getOpcode()))
15960 std::swap(Addend, MulOp);
15961
15962 if (!IsVWMulOpc(MulOp.getOpcode()))
15963 return SDValue();
15964
15965 SDValue MulPassthruOp = MulOp.getOperand(2);
15966
15967 if (!MulPassthruOp.isUndef())
15968 return SDValue();
15969
15970 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
15971 const RISCVSubtarget &Subtarget) {
15972 if (N->getOpcode() == ISD::ADD) {
15973 SDLoc DL(N);
15974 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
15975 Subtarget);
15976 }
15977 return std::make_pair(N->getOperand(3), N->getOperand(4));
15978 }(N, DAG, Subtarget);
15979
15980 SDValue MulMask = MulOp.getOperand(3);
15981 SDValue MulVL = MulOp.getOperand(4);
15982
15983 if (AddMask != MulMask || AddVL != MulVL)
15984 return SDValue();
15985
15986 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
15987 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
15988 "Unexpected opcode after VWMACC_VL");
15989 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
15990 "Unexpected opcode after VWMACC_VL!");
15991 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
15992 "Unexpected opcode after VWMUL_VL!");
15993 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
15994 "Unexpected opcode after VWMUL_VL!");
15995
15996 SDLoc DL(N);
15997 EVT VT = N->getValueType(0);
15998 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
15999 AddVL};
16000 return DAG.getNode(Opc, DL, VT, Ops);
16001}
16002
16004 ISD::MemIndexType &IndexType,
16006 if (!DCI.isBeforeLegalize())
16007 return false;
16008
16009 SelectionDAG &DAG = DCI.DAG;
16010 const MVT XLenVT =
16011 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
16012
16013 const EVT IndexVT = Index.getValueType();
16014
16015 // RISC-V indexed loads only support the "unsigned unscaled" addressing
16016 // mode, so anything else must be manually legalized.
16017 if (!isIndexTypeSigned(IndexType))
16018 return false;
16019
16020 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
16021 // Any index legalization should first promote to XLenVT, so we don't lose
16022 // bits when scaling. This may create an illegal index type so we let
16023 // LLVM's legalization take care of the splitting.
16024 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
16026 IndexVT.changeVectorElementType(XLenVT), Index);
16027 }
16028 IndexType = ISD::UNSIGNED_SCALED;
16029 return true;
16030}
16031
16032/// Match the index vector of a scatter or gather node as the shuffle mask
16033/// which performs the rearrangement if possible. Will only match if
16034/// all lanes are touched, and thus replacing the scatter or gather with
16035/// a unit strided access and shuffle is legal.
16037 SmallVector<int> &ShuffleMask) {
16038 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
16039 return false;
16041 return false;
16042
16043 const unsigned ElementSize = VT.getScalarStoreSize();
16044 const unsigned NumElems = VT.getVectorNumElements();
16045
16046 // Create the shuffle mask and check all bits active
16047 assert(ShuffleMask.empty());
16048 BitVector ActiveLanes(NumElems);
16049 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
16050 // TODO: We've found an active bit of UB, and could be
16051 // more aggressive here if desired.
16052 if (Index->getOperand(i)->isUndef())
16053 return false;
16054 uint64_t C = Index->getConstantOperandVal(i);
16055 if (C % ElementSize != 0)
16056 return false;
16057 C = C / ElementSize;
16058 if (C >= NumElems)
16059 return false;
16060 ShuffleMask.push_back(C);
16061 ActiveLanes.set(C);
16062 }
16063 return ActiveLanes.all();
16064}
16065
16066/// Match the index of a gather or scatter operation as an operation
16067/// with twice the element width and half the number of elements. This is
16068/// generally profitable (if legal) because these operations are linear
16069/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
16070/// come out ahead.
16072 Align BaseAlign, const RISCVSubtarget &ST) {
16073 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
16074 return false;
16076 return false;
16077
16078 // Attempt a doubling. If we can use a element type 4x or 8x in
16079 // size, this will happen via multiply iterations of the transform.
16080 const unsigned NumElems = VT.getVectorNumElements();
16081 if (NumElems % 2 != 0)
16082 return false;
16083
16084 const unsigned ElementSize = VT.getScalarStoreSize();
16085 const unsigned WiderElementSize = ElementSize * 2;
16086 if (WiderElementSize > ST.getELen()/8)
16087 return false;
16088
16089 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
16090 return false;
16091
16092 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
16093 // TODO: We've found an active bit of UB, and could be
16094 // more aggressive here if desired.
16095 if (Index->getOperand(i)->isUndef())
16096 return false;
16097 // TODO: This offset check is too strict if we support fully
16098 // misaligned memory operations.
16099 uint64_t C = Index->getConstantOperandVal(i);
16100 if (i % 2 == 0) {
16101 if (C % WiderElementSize != 0)
16102 return false;
16103 continue;
16104 }
16105 uint64_t Last = Index->getConstantOperandVal(i-1);
16106 if (C != Last + ElementSize)
16107 return false;
16108 }
16109 return true;
16110}
16111
16112// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
16113// This would be benefit for the cases where X and Y are both the same value
16114// type of low precision vectors. Since the truncate would be lowered into
16115// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
16116// restriction, such pattern would be expanded into a series of "vsetvli"
16117// and "vnsrl" instructions later to reach this point.
16119 SDValue Mask = N->getOperand(1);
16120 SDValue VL = N->getOperand(2);
16121
16122 bool IsVLMAX = isAllOnesConstant(VL) ||
16123 (isa<RegisterSDNode>(VL) &&
16124 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
16125 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
16126 Mask.getOperand(0) != VL)
16127 return SDValue();
16128
16129 auto IsTruncNode = [&](SDValue V) {
16130 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
16131 V.getOperand(1) == Mask && V.getOperand(2) == VL;
16132 };
16133
16134 SDValue Op = N->getOperand(0);
16135
16136 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
16137 // to distinguish such pattern.
16138 while (IsTruncNode(Op)) {
16139 if (!Op.hasOneUse())
16140 return SDValue();
16141 Op = Op.getOperand(0);
16142 }
16143
16144 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
16145 return SDValue();
16146
16147 SDValue N0 = Op.getOperand(0);
16148 SDValue N1 = Op.getOperand(1);
16149 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
16150 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
16151 return SDValue();
16152
16153 SDValue N00 = N0.getOperand(0);
16154 SDValue N10 = N1.getOperand(0);
16155 if (!N00.getValueType().isVector() ||
16156 N00.getValueType() != N10.getValueType() ||
16157 N->getValueType(0) != N10.getValueType())
16158 return SDValue();
16159
16160 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
16161 SDValue SMin =
16162 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
16163 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
16164 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
16165}
16166
16167// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
16168// maximum value for the truncated type.
16169// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
16170// is the signed maximum value for the truncated type and C2 is the signed
16171// minimum value.
16173 const RISCVSubtarget &Subtarget) {
16174 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
16175
16176 MVT VT = N->getSimpleValueType(0);
16177
16178 SDValue Mask = N->getOperand(1);
16179 SDValue VL = N->getOperand(2);
16180
16181 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
16182 APInt &SplatVal) {
16183 if (V.getOpcode() != Opc &&
16184 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
16185 V.getOperand(3) == Mask && V.getOperand(4) == VL))
16186 return SDValue();
16187
16188 SDValue Op = V.getOperand(1);
16189
16190 // Peek through conversion between fixed and scalable vectors.
16191 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
16192 isNullConstant(Op.getOperand(2)) &&
16193 Op.getOperand(1).getValueType().isFixedLengthVector() &&
16194 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16195 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
16196 isNullConstant(Op.getOperand(1).getOperand(1)))
16197 Op = Op.getOperand(1).getOperand(0);
16198
16199 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
16200 return V.getOperand(0);
16201
16202 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
16203 Op.getOperand(2) == VL) {
16204 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
16205 SplatVal =
16206 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
16207 return V.getOperand(0);
16208 }
16209 }
16210
16211 return SDValue();
16212 };
16213
16214 SDLoc DL(N);
16215
16216 auto DetectUSatPattern = [&](SDValue V) {
16217 APInt LoC, HiC;
16218
16219 // Simple case, V is a UMIN.
16220 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
16221 if (HiC.isMask(VT.getScalarSizeInBits()))
16222 return UMinOp;
16223
16224 // If we have an SMAX that removes negative numbers first, then we can match
16225 // SMIN instead of UMIN.
16226 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16227 if (SDValue SMaxOp =
16228 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16229 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
16230 return SMinOp;
16231
16232 // If we have an SMIN before an SMAX and the SMAX constant is less than or
16233 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
16234 // first.
16235 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16236 if (SDValue SMinOp =
16237 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16238 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
16239 HiC.uge(LoC))
16240 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
16241 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
16242 Mask, VL);
16243
16244 return SDValue();
16245 };
16246
16247 auto DetectSSatPattern = [&](SDValue V) {
16248 unsigned NumDstBits = VT.getScalarSizeInBits();
16249 unsigned NumSrcBits = V.getScalarValueSizeInBits();
16250 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
16251 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
16252
16253 APInt HiC, LoC;
16254 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16255 if (SDValue SMaxOp =
16256 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16257 if (HiC == SignedMax && LoC == SignedMin)
16258 return SMaxOp;
16259
16260 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16261 if (SDValue SMinOp =
16262 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16263 if (HiC == SignedMax && LoC == SignedMin)
16264 return SMinOp;
16265
16266 return SDValue();
16267 };
16268
16269 SDValue Src = N->getOperand(0);
16270
16271 // Look through multiple layers of truncates.
16272 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
16273 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
16274 Src.hasOneUse())
16275 Src = Src.getOperand(0);
16276
16277 SDValue Val;
16278 unsigned ClipOpc;
16279 if ((Val = DetectUSatPattern(Src)))
16281 else if ((Val = DetectSSatPattern(Src)))
16283 else
16284 return SDValue();
16285
16286 MVT ValVT = Val.getSimpleValueType();
16287
16288 do {
16289 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
16290 ValVT = ValVT.changeVectorElementType(ValEltVT);
16291 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
16292 } while (ValVT != VT);
16293
16294 return Val;
16295}
16296
16298 DAGCombinerInfo &DCI) const {
16299 SelectionDAG &DAG = DCI.DAG;
16300 const MVT XLenVT = Subtarget.getXLenVT();
16301 SDLoc DL(N);
16302
16303 // Helper to call SimplifyDemandedBits on an operand of N where only some low
16304 // bits are demanded. N will be added to the Worklist if it was not deleted.
16305 // Caller should return SDValue(N, 0) if this returns true.
16306 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
16307 SDValue Op = N->getOperand(OpNo);
16308 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
16309 if (!SimplifyDemandedBits(Op, Mask, DCI))
16310 return false;
16311
16312 if (N->getOpcode() != ISD::DELETED_NODE)
16313 DCI.AddToWorklist(N);
16314 return true;
16315 };
16316
16317 switch (N->getOpcode()) {
16318 default:
16319 break;
16320 case RISCVISD::SplitF64: {
16321 SDValue Op0 = N->getOperand(0);
16322 // If the input to SplitF64 is just BuildPairF64 then the operation is
16323 // redundant. Instead, use BuildPairF64's operands directly.
16324 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
16325 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
16326
16327 if (Op0->isUndef()) {
16328 SDValue Lo = DAG.getUNDEF(MVT::i32);
16329 SDValue Hi = DAG.getUNDEF(MVT::i32);
16330 return DCI.CombineTo(N, Lo, Hi);
16331 }
16332
16333 // It's cheaper to materialise two 32-bit integers than to load a double
16334 // from the constant pool and transfer it to integer registers through the
16335 // stack.
16336 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
16337 APInt V = C->getValueAPF().bitcastToAPInt();
16338 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
16339 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
16340 return DCI.CombineTo(N, Lo, Hi);
16341 }
16342
16343 // This is a target-specific version of a DAGCombine performed in
16344 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16345 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16346 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16347 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16348 !Op0.getNode()->hasOneUse())
16349 break;
16350 SDValue NewSplitF64 =
16351 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
16352 Op0.getOperand(0));
16353 SDValue Lo = NewSplitF64.getValue(0);
16354 SDValue Hi = NewSplitF64.getValue(1);
16355 APInt SignBit = APInt::getSignMask(32);
16356 if (Op0.getOpcode() == ISD::FNEG) {
16357 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
16358 DAG.getConstant(SignBit, DL, MVT::i32));
16359 return DCI.CombineTo(N, Lo, NewHi);
16360 }
16361 assert(Op0.getOpcode() == ISD::FABS);
16362 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
16363 DAG.getConstant(~SignBit, DL, MVT::i32));
16364 return DCI.CombineTo(N, Lo, NewHi);
16365 }
16366 case RISCVISD::SLLW:
16367 case RISCVISD::SRAW:
16368 case RISCVISD::SRLW:
16369 case RISCVISD::RORW:
16370 case RISCVISD::ROLW: {
16371 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
16372 if (SimplifyDemandedLowBitsHelper(0, 32) ||
16373 SimplifyDemandedLowBitsHelper(1, 5))
16374 return SDValue(N, 0);
16375
16376 break;
16377 }
16378 case RISCVISD::CLZW:
16379 case RISCVISD::CTZW: {
16380 // Only the lower 32 bits of the first operand are read
16381 if (SimplifyDemandedLowBitsHelper(0, 32))
16382 return SDValue(N, 0);
16383 break;
16384 }
16386 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
16387 // conversion is unnecessary and can be replaced with the
16388 // FMV_X_ANYEXTW_RV64 operand.
16389 SDValue Op0 = N->getOperand(0);
16391 return Op0.getOperand(0);
16392 break;
16393 }
16396 SDLoc DL(N);
16397 SDValue Op0 = N->getOperand(0);
16398 MVT VT = N->getSimpleValueType(0);
16399 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
16400 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
16401 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
16402 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
16403 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
16404 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
16405 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
16406 assert(Op0.getOperand(0).getValueType() == VT &&
16407 "Unexpected value type!");
16408 return Op0.getOperand(0);
16409 }
16410
16411 // This is a target-specific version of a DAGCombine performed in
16412 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16413 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16414 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16415 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16416 !Op0.getNode()->hasOneUse())
16417 break;
16418 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
16419 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
16420 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
16421 if (Op0.getOpcode() == ISD::FNEG)
16422 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
16423 DAG.getConstant(SignBit, DL, VT));
16424
16425 assert(Op0.getOpcode() == ISD::FABS);
16426 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
16427 DAG.getConstant(~SignBit, DL, VT));
16428 }
16429 case ISD::ABS: {
16430 EVT VT = N->getValueType(0);
16431 SDValue N0 = N->getOperand(0);
16432 // abs (sext) -> zext (abs)
16433 // abs (zext) -> zext (handled elsewhere)
16434 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
16435 SDValue Src = N0.getOperand(0);
16436 SDLoc DL(N);
16437 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
16438 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
16439 }
16440 break;
16441 }
16442 case ISD::ADD: {
16443 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
16444 return V;
16445 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
16446 return V;
16447 return performADDCombine(N, DCI, Subtarget);
16448 }
16449 case ISD::SUB: {
16450 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
16451 return V;
16452 return performSUBCombine(N, DAG, Subtarget);
16453 }
16454 case ISD::AND:
16455 return performANDCombine(N, DCI, Subtarget);
16456 case ISD::OR: {
16457 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
16458 return V;
16459 return performORCombine(N, DCI, Subtarget);
16460 }
16461 case ISD::XOR:
16462 return performXORCombine(N, DAG, Subtarget);
16463 case ISD::MUL:
16464 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
16465 return V;
16466 return performMULCombine(N, DAG, DCI, Subtarget);
16467 case ISD::SDIV:
16468 case ISD::UDIV:
16469 case ISD::SREM:
16470 case ISD::UREM:
16471 if (SDValue V = combineBinOpOfZExt(N, DAG))
16472 return V;
16473 break;
16474 case ISD::FMUL: {
16475 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
16476 SDValue N0 = N->getOperand(0);
16477 SDValue N1 = N->getOperand(1);
16478 if (N0->getOpcode() != ISD::FCOPYSIGN)
16479 std::swap(N0, N1);
16480 if (N0->getOpcode() != ISD::FCOPYSIGN)
16481 return SDValue();
16482 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0));
16483 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
16484 return SDValue();
16485 EVT VT = N->getValueType(0);
16486 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
16487 return SDValue();
16488 SDValue Sign = N0->getOperand(1);
16489 if (Sign.getValueType() != VT)
16490 return SDValue();
16491 return DAG.getNode(RISCVISD::FSGNJX, SDLoc(N), VT, N1, N0->getOperand(1));
16492 }
16493 case ISD::FADD:
16494 case ISD::UMAX:
16495 case ISD::UMIN:
16496 case ISD::SMAX:
16497 case ISD::SMIN:
16498 case ISD::FMAXNUM:
16499 case ISD::FMINNUM: {
16500 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16501 return V;
16502 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16503 return V;
16504 return SDValue();
16505 }
16506 case ISD::SETCC:
16507 return performSETCCCombine(N, DAG, Subtarget);
16509 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
16510 case ISD::ZERO_EXTEND:
16511 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
16512 // type legalization. This is safe because fp_to_uint produces poison if
16513 // it overflows.
16514 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
16515 SDValue Src = N->getOperand(0);
16516 if (Src.getOpcode() == ISD::FP_TO_UINT &&
16517 isTypeLegal(Src.getOperand(0).getValueType()))
16518 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
16519 Src.getOperand(0));
16520 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
16521 isTypeLegal(Src.getOperand(1).getValueType())) {
16522 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
16523 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
16524 Src.getOperand(0), Src.getOperand(1));
16525 DCI.CombineTo(N, Res);
16526 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
16527 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
16528 return SDValue(N, 0); // Return N so it doesn't get rechecked.
16529 }
16530 }
16531 return SDValue();
16533 if (SDValue V = combineTruncOfSraSext(N, DAG))
16534 return V;
16535 return combineTruncToVnclip(N, DAG, Subtarget);
16536 case ISD::TRUNCATE:
16537 return performTRUNCATECombine(N, DAG, Subtarget);
16538 case ISD::SELECT:
16539 return performSELECTCombine(N, DAG, Subtarget);
16541 case RISCVISD::CZERO_NEZ: {
16542 SDValue Val = N->getOperand(0);
16543 SDValue Cond = N->getOperand(1);
16544
16545 unsigned Opc = N->getOpcode();
16546
16547 // czero_eqz x, x -> x
16548 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
16549 return Val;
16550
16551 unsigned InvOpc =
16553
16554 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
16555 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
16556 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
16557 SDValue NewCond = Cond.getOperand(0);
16558 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
16559 if (DAG.MaskedValueIsZero(NewCond, Mask))
16560 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
16561 }
16562 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
16563 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
16564 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
16565 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
16566 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
16567 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16568 if (ISD::isIntEqualitySetCC(CCVal))
16569 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
16570 N->getValueType(0), Val, Cond.getOperand(0));
16571 }
16572 return SDValue();
16573 }
16574 case RISCVISD::SELECT_CC: {
16575 // Transform
16576 SDValue LHS = N->getOperand(0);
16577 SDValue RHS = N->getOperand(1);
16578 SDValue CC = N->getOperand(2);
16579 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16580 SDValue TrueV = N->getOperand(3);
16581 SDValue FalseV = N->getOperand(4);
16582 SDLoc DL(N);
16583 EVT VT = N->getValueType(0);
16584
16585 // If the True and False values are the same, we don't need a select_cc.
16586 if (TrueV == FalseV)
16587 return TrueV;
16588
16589 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
16590 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
16591 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
16592 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
16593 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
16594 if (CCVal == ISD::CondCode::SETGE)
16595 std::swap(TrueV, FalseV);
16596
16597 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
16598 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
16599 // Only handle simm12, if it is not in this range, it can be considered as
16600 // register.
16601 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
16602 isInt<12>(TrueSImm - FalseSImm)) {
16603 SDValue SRA =
16604 DAG.getNode(ISD::SRA, DL, VT, LHS,
16605 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
16606 SDValue AND =
16607 DAG.getNode(ISD::AND, DL, VT, SRA,
16608 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
16609 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
16610 }
16611
16612 if (CCVal == ISD::CondCode::SETGE)
16613 std::swap(TrueV, FalseV);
16614 }
16615
16616 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16617 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
16618 {LHS, RHS, CC, TrueV, FalseV});
16619
16620 if (!Subtarget.hasConditionalMoveFusion()) {
16621 // (select c, -1, y) -> -c | y
16622 if (isAllOnesConstant(TrueV)) {
16623 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16624 SDValue Neg = DAG.getNegative(C, DL, VT);
16625 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
16626 }
16627 // (select c, y, -1) -> -!c | y
16628 if (isAllOnesConstant(FalseV)) {
16629 SDValue C =
16630 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16631 SDValue Neg = DAG.getNegative(C, DL, VT);
16632 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
16633 }
16634
16635 // (select c, 0, y) -> -!c & y
16636 if (isNullConstant(TrueV)) {
16637 SDValue C =
16638 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16639 SDValue Neg = DAG.getNegative(C, DL, VT);
16640 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
16641 }
16642 // (select c, y, 0) -> -c & y
16643 if (isNullConstant(FalseV)) {
16644 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16645 SDValue Neg = DAG.getNegative(C, DL, VT);
16646 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
16647 }
16648 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
16649 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
16650 if (((isOneConstant(FalseV) && LHS == TrueV &&
16651 CCVal == ISD::CondCode::SETNE) ||
16652 (isOneConstant(TrueV) && LHS == FalseV &&
16653 CCVal == ISD::CondCode::SETEQ)) &&
16655 // freeze it to be safe.
16656 LHS = DAG.getFreeze(LHS);
16658 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
16659 }
16660 }
16661
16662 // If both true/false are an xor with 1, pull through the select.
16663 // This can occur after op legalization if both operands are setccs that
16664 // require an xor to invert.
16665 // FIXME: Generalize to other binary ops with identical operand?
16666 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
16667 TrueV.getOperand(1) == FalseV.getOperand(1) &&
16668 isOneConstant(TrueV.getOperand(1)) &&
16669 TrueV.hasOneUse() && FalseV.hasOneUse()) {
16670 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
16671 TrueV.getOperand(0), FalseV.getOperand(0));
16672 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
16673 }
16674
16675 return SDValue();
16676 }
16677 case RISCVISD::BR_CC: {
16678 SDValue LHS = N->getOperand(1);
16679 SDValue RHS = N->getOperand(2);
16680 SDValue CC = N->getOperand(3);
16681 SDLoc DL(N);
16682
16683 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16684 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
16685 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
16686
16687 return SDValue();
16688 }
16689 case ISD::BITREVERSE:
16690 return performBITREVERSECombine(N, DAG, Subtarget);
16691 case ISD::FP_TO_SINT:
16692 case ISD::FP_TO_UINT:
16693 return performFP_TO_INTCombine(N, DCI, Subtarget);
16696 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
16697 case ISD::FCOPYSIGN: {
16698 EVT VT = N->getValueType(0);
16699 if (!VT.isVector())
16700 break;
16701 // There is a form of VFSGNJ which injects the negated sign of its second
16702 // operand. Try and bubble any FNEG up after the extend/round to produce
16703 // this optimized pattern. Avoid modifying cases where FP_ROUND and
16704 // TRUNC=1.
16705 SDValue In2 = N->getOperand(1);
16706 // Avoid cases where the extend/round has multiple uses, as duplicating
16707 // those is typically more expensive than removing a fneg.
16708 if (!In2.hasOneUse())
16709 break;
16710 if (In2.getOpcode() != ISD::FP_EXTEND &&
16711 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
16712 break;
16713 In2 = In2.getOperand(0);
16714 if (In2.getOpcode() != ISD::FNEG)
16715 break;
16716 SDLoc DL(N);
16717 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
16718 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
16719 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
16720 }
16721 case ISD::MGATHER: {
16722 const auto *MGN = cast<MaskedGatherSDNode>(N);
16723 const EVT VT = N->getValueType(0);
16724 SDValue Index = MGN->getIndex();
16725 SDValue ScaleOp = MGN->getScale();
16726 ISD::MemIndexType IndexType = MGN->getIndexType();
16727 assert(!MGN->isIndexScaled() &&
16728 "Scaled gather/scatter should not be formed");
16729
16730 SDLoc DL(N);
16731 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16732 return DAG.getMaskedGather(
16733 N->getVTList(), MGN->getMemoryVT(), DL,
16734 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16735 MGN->getBasePtr(), Index, ScaleOp},
16736 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16737
16738 if (narrowIndex(Index, IndexType, DAG))
16739 return DAG.getMaskedGather(
16740 N->getVTList(), MGN->getMemoryVT(), DL,
16741 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16742 MGN->getBasePtr(), Index, ScaleOp},
16743 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16744
16745 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
16746 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
16747 // The sequence will be XLenVT, not the type of Index. Tell
16748 // isSimpleVIDSequence this so we avoid overflow.
16749 if (std::optional<VIDSequence> SimpleVID =
16750 isSimpleVIDSequence(Index, Subtarget.getXLen());
16751 SimpleVID && SimpleVID->StepDenominator == 1) {
16752 const int64_t StepNumerator = SimpleVID->StepNumerator;
16753 const int64_t Addend = SimpleVID->Addend;
16754
16755 // Note: We don't need to check alignment here since (by assumption
16756 // from the existance of the gather), our offsets must be sufficiently
16757 // aligned.
16758
16759 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
16760 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
16761 assert(IndexType == ISD::UNSIGNED_SCALED);
16762 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
16763 DAG.getSignedConstant(Addend, DL, PtrVT));
16764
16765 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
16767 SDValue StridedLoad = DAG.getStridedLoadVP(
16768 VT, DL, MGN->getChain(), BasePtr,
16769 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
16770 EVL, MGN->getMemOperand());
16771 SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),
16772 StridedLoad, MGN->getPassThru(), EVL);
16773 return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},
16774 DL);
16775 }
16776 }
16777
16778 SmallVector<int> ShuffleMask;
16779 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16780 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
16781 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
16782 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
16783 MGN->getMask(), DAG.getUNDEF(VT),
16784 MGN->getMemoryVT(), MGN->getMemOperand(),
16786 SDValue Shuffle =
16787 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
16788 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
16789 }
16790
16791 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16792 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
16793 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
16794 SmallVector<SDValue> NewIndices;
16795 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
16796 NewIndices.push_back(Index.getOperand(i));
16797 EVT IndexVT = Index.getValueType()
16799 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
16800
16801 unsigned ElementSize = VT.getScalarStoreSize();
16802 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
16803 auto EltCnt = VT.getVectorElementCount();
16804 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
16805 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
16806 EltCnt.divideCoefficientBy(2));
16807 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
16808 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16809 EltCnt.divideCoefficientBy(2));
16810 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
16811
16812 SDValue Gather =
16813 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
16814 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
16815 Index, ScaleOp},
16816 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
16817 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
16818 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
16819 }
16820 break;
16821 }
16822 case ISD::MSCATTER:{
16823 const auto *MSN = cast<MaskedScatterSDNode>(N);
16824 SDValue Index = MSN->getIndex();
16825 SDValue ScaleOp = MSN->getScale();
16826 ISD::MemIndexType IndexType = MSN->getIndexType();
16827 assert(!MSN->isIndexScaled() &&
16828 "Scaled gather/scatter should not be formed");
16829
16830 SDLoc DL(N);
16831 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16832 return DAG.getMaskedScatter(
16833 N->getVTList(), MSN->getMemoryVT(), DL,
16834 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16835 Index, ScaleOp},
16836 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
16837
16838 if (narrowIndex(Index, IndexType, DAG))
16839 return DAG.getMaskedScatter(
16840 N->getVTList(), MSN->getMemoryVT(), DL,
16841 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16842 Index, ScaleOp},
16843 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
16844
16845 EVT VT = MSN->getValue()->getValueType(0);
16846 SmallVector<int> ShuffleMask;
16847 if (!MSN->isTruncatingStore() &&
16848 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
16849 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
16850 DAG.getUNDEF(VT), ShuffleMask);
16851 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
16852 DAG.getUNDEF(XLenVT), MSN->getMask(),
16853 MSN->getMemoryVT(), MSN->getMemOperand(),
16854 ISD::UNINDEXED, false);
16855 }
16856 break;
16857 }
16858 case ISD::VP_GATHER: {
16859 const auto *VPGN = cast<VPGatherSDNode>(N);
16860 SDValue Index = VPGN->getIndex();
16861 SDValue ScaleOp = VPGN->getScale();
16862 ISD::MemIndexType IndexType = VPGN->getIndexType();
16863 assert(!VPGN->isIndexScaled() &&
16864 "Scaled gather/scatter should not be formed");
16865
16866 SDLoc DL(N);
16867 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16868 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
16869 {VPGN->getChain(), VPGN->getBasePtr(), Index,
16870 ScaleOp, VPGN->getMask(),
16871 VPGN->getVectorLength()},
16872 VPGN->getMemOperand(), IndexType);
16873
16874 if (narrowIndex(Index, IndexType, DAG))
16875 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
16876 {VPGN->getChain(), VPGN->getBasePtr(), Index,
16877 ScaleOp, VPGN->getMask(),
16878 VPGN->getVectorLength()},
16879 VPGN->getMemOperand(), IndexType);
16880
16881 break;
16882 }
16883 case ISD::VP_SCATTER: {
16884 const auto *VPSN = cast<VPScatterSDNode>(N);
16885 SDValue Index = VPSN->getIndex();
16886 SDValue ScaleOp = VPSN->getScale();
16887 ISD::MemIndexType IndexType = VPSN->getIndexType();
16888 assert(!VPSN->isIndexScaled() &&
16889 "Scaled gather/scatter should not be formed");
16890
16891 SDLoc DL(N);
16892 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16893 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
16894 {VPSN->getChain(), VPSN->getValue(),
16895 VPSN->getBasePtr(), Index, ScaleOp,
16896 VPSN->getMask(), VPSN->getVectorLength()},
16897 VPSN->getMemOperand(), IndexType);
16898
16899 if (narrowIndex(Index, IndexType, DAG))
16900 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
16901 {VPSN->getChain(), VPSN->getValue(),
16902 VPSN->getBasePtr(), Index, ScaleOp,
16903 VPSN->getMask(), VPSN->getVectorLength()},
16904 VPSN->getMemOperand(), IndexType);
16905 break;
16906 }
16907 case RISCVISD::SHL_VL:
16908 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
16909 return V;
16910 [[fallthrough]];
16911 case RISCVISD::SRA_VL:
16912 case RISCVISD::SRL_VL: {
16913 SDValue ShAmt = N->getOperand(1);
16915 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16916 SDLoc DL(N);
16917 SDValue VL = N->getOperand(4);
16918 EVT VT = N->getValueType(0);
16919 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16920 ShAmt.getOperand(1), VL);
16921 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
16922 N->getOperand(2), N->getOperand(3), N->getOperand(4));
16923 }
16924 break;
16925 }
16926 case ISD::SRA:
16927 if (SDValue V = performSRACombine(N, DAG, Subtarget))
16928 return V;
16929 [[fallthrough]];
16930 case ISD::SRL:
16931 case ISD::SHL: {
16932 if (N->getOpcode() == ISD::SHL) {
16933 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
16934 return V;
16935 }
16936 SDValue ShAmt = N->getOperand(1);
16938 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16939 SDLoc DL(N);
16940 EVT VT = N->getValueType(0);
16941 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16942 ShAmt.getOperand(1),
16943 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
16944 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
16945 }
16946 break;
16947 }
16948 case RISCVISD::ADD_VL:
16949 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
16950 return V;
16951 return combineToVWMACC(N, DAG, Subtarget);
16956 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
16957 case RISCVISD::SUB_VL:
16958 case RISCVISD::MUL_VL:
16959 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
16968 return performVFMADD_VLCombine(N, DCI, Subtarget);
16969 case RISCVISD::FADD_VL:
16970 case RISCVISD::FSUB_VL:
16971 case RISCVISD::FMUL_VL:
16973 case RISCVISD::VFWSUB_W_VL: {
16974 if (N->getValueType(0).getVectorElementType() == MVT::f32 &&
16975 !Subtarget.hasVInstructionsF16())
16976 return SDValue();
16977 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
16978 }
16979 case ISD::LOAD:
16980 case ISD::STORE: {
16981 if (DCI.isAfterLegalizeDAG())
16982 if (SDValue V = performMemPairCombine(N, DCI))
16983 return V;
16984
16985 if (N->getOpcode() != ISD::STORE)
16986 break;
16987
16988 auto *Store = cast<StoreSDNode>(N);
16989 SDValue Chain = Store->getChain();
16990 EVT MemVT = Store->getMemoryVT();
16991 SDValue Val = Store->getValue();
16992 SDLoc DL(N);
16993
16994 bool IsScalarizable =
16995 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
16996 Store->isSimple() &&
16997 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
16998 isPowerOf2_64(MemVT.getSizeInBits()) &&
16999 MemVT.getSizeInBits() <= Subtarget.getXLen();
17000
17001 // If sufficiently aligned we can scalarize stores of constant vectors of
17002 // any power-of-two size up to XLen bits, provided that they aren't too
17003 // expensive to materialize.
17004 // vsetivli zero, 2, e8, m1, ta, ma
17005 // vmv.v.i v8, 4
17006 // vse64.v v8, (a0)
17007 // ->
17008 // li a1, 1028
17009 // sh a1, 0(a0)
17010 if (DCI.isBeforeLegalize() && IsScalarizable &&
17012 // Get the constant vector bits
17013 APInt NewC(Val.getValueSizeInBits(), 0);
17014 uint64_t EltSize = Val.getScalarValueSizeInBits();
17015 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
17016 if (Val.getOperand(i).isUndef())
17017 continue;
17018 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
17019 i * EltSize);
17020 }
17021 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
17022
17023 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
17024 true) <= 2 &&
17026 NewVT, *Store->getMemOperand())) {
17027 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
17028 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
17029 Store->getPointerInfo(), Store->getOriginalAlign(),
17030 Store->getMemOperand()->getFlags());
17031 }
17032 }
17033
17034 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
17035 // vsetivli zero, 2, e16, m1, ta, ma
17036 // vle16.v v8, (a0)
17037 // vse16.v v8, (a1)
17038 if (auto *L = dyn_cast<LoadSDNode>(Val);
17039 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
17040 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
17041 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
17042 L->getMemoryVT() == MemVT) {
17043 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
17045 NewVT, *Store->getMemOperand()) &&
17047 NewVT, *L->getMemOperand())) {
17048 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
17049 L->getPointerInfo(), L->getOriginalAlign(),
17050 L->getMemOperand()->getFlags());
17051 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
17052 Store->getPointerInfo(), Store->getOriginalAlign(),
17053 Store->getMemOperand()->getFlags());
17054 }
17055 }
17056
17057 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
17058 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
17059 // any illegal types.
17060 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
17061 (DCI.isAfterLegalizeDAG() &&
17063 isNullConstant(Val.getOperand(1)))) {
17064 SDValue Src = Val.getOperand(0);
17065 MVT VecVT = Src.getSimpleValueType();
17066 // VecVT should be scalable and memory VT should match the element type.
17067 if (!Store->isIndexed() && VecVT.isScalableVector() &&
17068 MemVT == VecVT.getVectorElementType()) {
17069 SDLoc DL(N);
17070 MVT MaskVT = getMaskTypeFor(VecVT);
17071 return DAG.getStoreVP(
17072 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
17073 DAG.getConstant(1, DL, MaskVT),
17074 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
17075 Store->getMemOperand(), Store->getAddressingMode(),
17076 Store->isTruncatingStore(), /*IsCompress*/ false);
17077 }
17078 }
17079
17080 break;
17081 }
17082 case ISD::SPLAT_VECTOR: {
17083 EVT VT = N->getValueType(0);
17084 // Only perform this combine on legal MVT types.
17085 if (!isTypeLegal(VT))
17086 break;
17087 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
17088 DAG, Subtarget))
17089 return Gather;
17090 break;
17091 }
17092 case ISD::BUILD_VECTOR:
17093 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
17094 return V;
17095 break;
17097 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
17098 return V;
17099 break;
17101 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
17102 return V;
17103 break;
17104 case RISCVISD::VFMV_V_F_VL: {
17105 const MVT VT = N->getSimpleValueType(0);
17106 SDValue Passthru = N->getOperand(0);
17107 SDValue Scalar = N->getOperand(1);
17108 SDValue VL = N->getOperand(2);
17109
17110 // If VL is 1, we can use vfmv.s.f.
17111 if (isOneConstant(VL))
17112 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
17113 break;
17114 }
17115 case RISCVISD::VMV_V_X_VL: {
17116 const MVT VT = N->getSimpleValueType(0);
17117 SDValue Passthru = N->getOperand(0);
17118 SDValue Scalar = N->getOperand(1);
17119 SDValue VL = N->getOperand(2);
17120
17121 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
17122 // scalar input.
17123 unsigned ScalarSize = Scalar.getValueSizeInBits();
17124 unsigned EltWidth = VT.getScalarSizeInBits();
17125 if (ScalarSize > EltWidth && Passthru.isUndef())
17126 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
17127 return SDValue(N, 0);
17128
17129 // If VL is 1 and the scalar value won't benefit from immediate, we can
17130 // use vmv.s.x.
17131 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
17132 if (isOneConstant(VL) &&
17133 (!Const || Const->isZero() ||
17134 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
17135 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
17136
17137 break;
17138 }
17139 case RISCVISD::VFMV_S_F_VL: {
17140 SDValue Src = N->getOperand(1);
17141 // Try to remove vector->scalar->vector if the scalar->vector is inserting
17142 // into an undef vector.
17143 // TODO: Could use a vslide or vmv.v.v for non-undef.
17144 if (N->getOperand(0).isUndef() &&
17145 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17146 isNullConstant(Src.getOperand(1)) &&
17147 Src.getOperand(0).getValueType().isScalableVector()) {
17148 EVT VT = N->getValueType(0);
17149 EVT SrcVT = Src.getOperand(0).getValueType();
17151 // Widths match, just return the original vector.
17152 if (SrcVT == VT)
17153 return Src.getOperand(0);
17154 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
17155 }
17156 [[fallthrough]];
17157 }
17158 case RISCVISD::VMV_S_X_VL: {
17159 const MVT VT = N->getSimpleValueType(0);
17160 SDValue Passthru = N->getOperand(0);
17161 SDValue Scalar = N->getOperand(1);
17162 SDValue VL = N->getOperand(2);
17163
17164 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
17165 Scalar.getOperand(0).getValueType() == N->getValueType(0))
17166 return Scalar.getOperand(0);
17167
17168 // Use M1 or smaller to avoid over constraining register allocation
17169 const MVT M1VT = getLMUL1VT(VT);
17170 if (M1VT.bitsLT(VT)) {
17171 SDValue M1Passthru =
17172 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
17173 DAG.getVectorIdxConstant(0, DL));
17174 SDValue Result =
17175 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
17176 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
17177 DAG.getVectorIdxConstant(0, DL));
17178 return Result;
17179 }
17180
17181 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
17182 // higher would involve overly constraining the register allocator for
17183 // no purpose.
17184 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
17185 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
17186 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
17187 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
17188
17189 break;
17190 }
17191 case RISCVISD::VMV_X_S: {
17192 SDValue Vec = N->getOperand(0);
17193 MVT VecVT = N->getOperand(0).getSimpleValueType();
17194 const MVT M1VT = getLMUL1VT(VecVT);
17195 if (M1VT.bitsLT(VecVT)) {
17196 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
17197 DAG.getVectorIdxConstant(0, DL));
17198 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
17199 }
17200 break;
17201 }
17205 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
17206 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
17207 switch (IntNo) {
17208 // By default we do not combine any intrinsic.
17209 default:
17210 return SDValue();
17211 case Intrinsic::riscv_vcpop:
17212 case Intrinsic::riscv_vcpop_mask:
17213 case Intrinsic::riscv_vfirst:
17214 case Intrinsic::riscv_vfirst_mask: {
17215 SDValue VL = N->getOperand(2);
17216 if (IntNo == Intrinsic::riscv_vcpop_mask ||
17217 IntNo == Intrinsic::riscv_vfirst_mask)
17218 VL = N->getOperand(3);
17219 if (!isNullConstant(VL))
17220 return SDValue();
17221 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
17222 SDLoc DL(N);
17223 EVT VT = N->getValueType(0);
17224 if (IntNo == Intrinsic::riscv_vfirst ||
17225 IntNo == Intrinsic::riscv_vfirst_mask)
17226 return DAG.getAllOnesConstant(DL, VT);
17227 return DAG.getConstant(0, DL, VT);
17228 }
17229 }
17230 }
17231 case ISD::BITCAST: {
17233 SDValue N0 = N->getOperand(0);
17234 EVT VT = N->getValueType(0);
17235 EVT SrcVT = N0.getValueType();
17236 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
17237 // type, widen both sides to avoid a trip through memory.
17238 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
17239 VT.isScalarInteger()) {
17240 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
17241 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
17242 Ops[0] = N0;
17243 SDLoc DL(N);
17244 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
17245 N0 = DAG.getBitcast(MVT::i8, N0);
17246 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
17247 }
17248
17249 return SDValue();
17250 }
17251 }
17252
17253 return SDValue();
17254}
17255
17257 EVT XVT, unsigned KeptBits) const {
17258 // For vectors, we don't have a preference..
17259 if (XVT.isVector())
17260 return false;
17261
17262 if (XVT != MVT::i32 && XVT != MVT::i64)
17263 return false;
17264
17265 // We can use sext.w for RV64 or an srai 31 on RV32.
17266 if (KeptBits == 32 || KeptBits == 64)
17267 return true;
17268
17269 // With Zbb we can use sext.h/sext.b.
17270 return Subtarget.hasStdExtZbb() &&
17271 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
17272 KeptBits == 16);
17273}
17274
17276 const SDNode *N, CombineLevel Level) const {
17277 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
17278 N->getOpcode() == ISD::SRL) &&
17279 "Expected shift op");
17280
17281 // The following folds are only desirable if `(OP _, c1 << c2)` can be
17282 // materialised in fewer instructions than `(OP _, c1)`:
17283 //
17284 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
17285 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
17286 SDValue N0 = N->getOperand(0);
17287 EVT Ty = N0.getValueType();
17288 if (Ty.isScalarInteger() &&
17289 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
17290 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17291 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17292 if (C1 && C2) {
17293 const APInt &C1Int = C1->getAPIntValue();
17294 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
17295
17296 // We can materialise `c1 << c2` into an add immediate, so it's "free",
17297 // and the combine should happen, to potentially allow further combines
17298 // later.
17299 if (ShiftedC1Int.getSignificantBits() <= 64 &&
17300 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
17301 return true;
17302
17303 // We can materialise `c1` in an add immediate, so it's "free", and the
17304 // combine should be prevented.
17305 if (C1Int.getSignificantBits() <= 64 &&
17307 return false;
17308
17309 // Neither constant will fit into an immediate, so find materialisation
17310 // costs.
17311 int C1Cost =
17312 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
17313 /*CompressionCost*/ true);
17314 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
17315 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
17316 /*CompressionCost*/ true);
17317
17318 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
17319 // combine should be prevented.
17320 if (C1Cost < ShiftedC1Cost)
17321 return false;
17322 }
17323 }
17324 return true;
17325}
17326
17328 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
17329 TargetLoweringOpt &TLO) const {
17330 // Delay this optimization as late as possible.
17331 if (!TLO.LegalOps)
17332 return false;
17333
17334 EVT VT = Op.getValueType();
17335 if (VT.isVector())
17336 return false;
17337
17338 unsigned Opcode = Op.getOpcode();
17339 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
17340 return false;
17341
17342 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17343 if (!C)
17344 return false;
17345
17346 const APInt &Mask = C->getAPIntValue();
17347
17348 // Clear all non-demanded bits initially.
17349 APInt ShrunkMask = Mask & DemandedBits;
17350
17351 // Try to make a smaller immediate by setting undemanded bits.
17352
17353 APInt ExpandedMask = Mask | ~DemandedBits;
17354
17355 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
17356 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
17357 };
17358 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
17359 if (NewMask == Mask)
17360 return true;
17361 SDLoc DL(Op);
17362 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
17363 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
17364 Op.getOperand(0), NewC);
17365 return TLO.CombineTo(Op, NewOp);
17366 };
17367
17368 // If the shrunk mask fits in sign extended 12 bits, let the target
17369 // independent code apply it.
17370 if (ShrunkMask.isSignedIntN(12))
17371 return false;
17372
17373 // And has a few special cases for zext.
17374 if (Opcode == ISD::AND) {
17375 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
17376 // otherwise use SLLI + SRLI.
17377 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
17378 if (IsLegalMask(NewMask))
17379 return UseMask(NewMask);
17380
17381 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
17382 if (VT == MVT::i64) {
17383 APInt NewMask = APInt(64, 0xffffffff);
17384 if (IsLegalMask(NewMask))
17385 return UseMask(NewMask);
17386 }
17387 }
17388
17389 // For the remaining optimizations, we need to be able to make a negative
17390 // number through a combination of mask and undemanded bits.
17391 if (!ExpandedMask.isNegative())
17392 return false;
17393
17394 // What is the fewest number of bits we need to represent the negative number.
17395 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
17396
17397 // Try to make a 12 bit negative immediate. If that fails try to make a 32
17398 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
17399 // If we can't create a simm12, we shouldn't change opaque constants.
17400 APInt NewMask = ShrunkMask;
17401 if (MinSignedBits <= 12)
17402 NewMask.setBitsFrom(11);
17403 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
17404 NewMask.setBitsFrom(31);
17405 else
17406 return false;
17407
17408 // Check that our new mask is a subset of the demanded mask.
17409 assert(IsLegalMask(NewMask));
17410 return UseMask(NewMask);
17411}
17412
17413static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
17414 static const uint64_t GREVMasks[] = {
17415 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
17416 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
17417
17418 for (unsigned Stage = 0; Stage != 6; ++Stage) {
17419 unsigned Shift = 1 << Stage;
17420 if (ShAmt & Shift) {
17421 uint64_t Mask = GREVMasks[Stage];
17422 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
17423 if (IsGORC)
17424 Res |= x;
17425 x = Res;
17426 }
17427 }
17428
17429 return x;
17430}
17431
17433 KnownBits &Known,
17434 const APInt &DemandedElts,
17435 const SelectionDAG &DAG,
17436 unsigned Depth) const {
17437 unsigned BitWidth = Known.getBitWidth();
17438 unsigned Opc = Op.getOpcode();
17439 assert((Opc >= ISD::BUILTIN_OP_END ||
17440 Opc == ISD::INTRINSIC_WO_CHAIN ||
17441 Opc == ISD::INTRINSIC_W_CHAIN ||
17442 Opc == ISD::INTRINSIC_VOID) &&
17443 "Should use MaskedValueIsZero if you don't know whether Op"
17444 " is a target node!");
17445
17446 Known.resetAll();
17447 switch (Opc) {
17448 default: break;
17449 case RISCVISD::SELECT_CC: {
17450 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
17451 // If we don't know any bits, early out.
17452 if (Known.isUnknown())
17453 break;
17454 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
17455
17456 // Only known if known in both the LHS and RHS.
17457 Known = Known.intersectWith(Known2);
17458 break;
17459 }
17462 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17463 // Result is either all zero or operand 0. We can propagate zeros, but not
17464 // ones.
17465 Known.One.clearAllBits();
17466 break;
17467 case RISCVISD::REMUW: {
17468 KnownBits Known2;
17469 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17470 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17471 // We only care about the lower 32 bits.
17472 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
17473 // Restore the original width by sign extending.
17474 Known = Known.sext(BitWidth);
17475 break;
17476 }
17477 case RISCVISD::DIVUW: {
17478 KnownBits Known2;
17479 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17480 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17481 // We only care about the lower 32 bits.
17482 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
17483 // Restore the original width by sign extending.
17484 Known = Known.sext(BitWidth);
17485 break;
17486 }
17487 case RISCVISD::SLLW: {
17488 KnownBits Known2;
17489 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17490 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17491 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
17492 // Restore the original width by sign extending.
17493 Known = Known.sext(BitWidth);
17494 break;
17495 }
17496 case RISCVISD::CTZW: {
17497 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17498 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
17499 unsigned LowBits = llvm::bit_width(PossibleTZ);
17500 Known.Zero.setBitsFrom(LowBits);
17501 break;
17502 }
17503 case RISCVISD::CLZW: {
17504 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17505 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
17506 unsigned LowBits = llvm::bit_width(PossibleLZ);
17507 Known.Zero.setBitsFrom(LowBits);
17508 break;
17509 }
17510 case RISCVISD::BREV8:
17511 case RISCVISD::ORC_B: {
17512 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
17513 // control value of 7 is equivalent to brev8 and orc.b.
17514 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17515 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
17516 // To compute zeros, we need to invert the value and invert it back after.
17517 Known.Zero =
17518 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
17519 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
17520 break;
17521 }
17522 case RISCVISD::READ_VLENB: {
17523 // We can use the minimum and maximum VLEN values to bound VLENB. We
17524 // know VLEN must be a power of two.
17525 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
17526 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
17527 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
17528 Known.Zero.setLowBits(Log2_32(MinVLenB));
17529 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
17530 if (MaxVLenB == MinVLenB)
17531 Known.One.setBit(Log2_32(MinVLenB));
17532 break;
17533 }
17534 case RISCVISD::FCLASS: {
17535 // fclass will only set one of the low 10 bits.
17536 Known.Zero.setBitsFrom(10);
17537 break;
17538 }
17541 unsigned IntNo =
17542 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
17543 switch (IntNo) {
17544 default:
17545 // We can't do anything for most intrinsics.
17546 break;
17547 case Intrinsic::riscv_vsetvli:
17548 case Intrinsic::riscv_vsetvlimax: {
17549 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
17550 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
17551 RISCVII::VLMUL VLMUL =
17552 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
17553 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
17554 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
17555 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
17556 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
17557
17558 // Result of vsetvli must be not larger than AVL.
17559 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
17560 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
17561
17562 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
17563 if (BitWidth > KnownZeroFirstBit)
17564 Known.Zero.setBitsFrom(KnownZeroFirstBit);
17565 break;
17566 }
17567 }
17568 break;
17569 }
17570 }
17571}
17572
17574 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17575 unsigned Depth) const {
17576 switch (Op.getOpcode()) {
17577 default:
17578 break;
17579 case RISCVISD::SELECT_CC: {
17580 unsigned Tmp =
17581 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
17582 if (Tmp == 1) return 1; // Early out.
17583 unsigned Tmp2 =
17584 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
17585 return std::min(Tmp, Tmp2);
17586 }
17589 // Output is either all zero or operand 0. We can propagate sign bit count
17590 // from operand 0.
17591 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17592 case RISCVISD::ABSW: {
17593 // We expand this at isel to negw+max. The result will have 33 sign bits
17594 // if the input has at least 33 sign bits.
17595 unsigned Tmp =
17596 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17597 if (Tmp < 33) return 1;
17598 return 33;
17599 }
17600 case RISCVISD::SLLW:
17601 case RISCVISD::SRAW:
17602 case RISCVISD::SRLW:
17603 case RISCVISD::DIVW:
17604 case RISCVISD::DIVUW:
17605 case RISCVISD::REMUW:
17606 case RISCVISD::ROLW:
17607 case RISCVISD::RORW:
17612 // TODO: As the result is sign-extended, this is conservatively correct. A
17613 // more precise answer could be calculated for SRAW depending on known
17614 // bits in the shift amount.
17615 return 33;
17616 case RISCVISD::VMV_X_S: {
17617 // The number of sign bits of the scalar result is computed by obtaining the
17618 // element type of the input vector operand, subtracting its width from the
17619 // XLEN, and then adding one (sign bit within the element type). If the
17620 // element type is wider than XLen, the least-significant XLEN bits are
17621 // taken.
17622 unsigned XLen = Subtarget.getXLen();
17623 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
17624 if (EltBits <= XLen)
17625 return XLen - EltBits + 1;
17626 break;
17627 }
17629 unsigned IntNo = Op.getConstantOperandVal(1);
17630 switch (IntNo) {
17631 default:
17632 break;
17633 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
17634 case Intrinsic::riscv_masked_atomicrmw_add_i64:
17635 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
17636 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
17637 case Intrinsic::riscv_masked_atomicrmw_max_i64:
17638 case Intrinsic::riscv_masked_atomicrmw_min_i64:
17639 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
17640 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
17641 case Intrinsic::riscv_masked_cmpxchg_i64:
17642 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
17643 // narrow atomic operation. These are implemented using atomic
17644 // operations at the minimum supported atomicrmw/cmpxchg width whose
17645 // result is then sign extended to XLEN. With +A, the minimum width is
17646 // 32 for both 64 and 32.
17647 assert(Subtarget.getXLen() == 64);
17649 assert(Subtarget.hasStdExtA());
17650 return 33;
17651 }
17652 break;
17653 }
17654 }
17655
17656 return 1;
17657}
17658
17660 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17661 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
17662
17663 // TODO: Add more target nodes.
17664 switch (Op.getOpcode()) {
17666 // Integer select_cc cannot create poison.
17667 // TODO: What are the FP poison semantics?
17668 // TODO: This instruction blocks poison from the unselected operand, can
17669 // we do anything with that?
17670 return !Op.getValueType().isInteger();
17671 }
17673 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
17674}
17675
17676const Constant *
17678 assert(Ld && "Unexpected null LoadSDNode");
17679 if (!ISD::isNormalLoad(Ld))
17680 return nullptr;
17681
17682 SDValue Ptr = Ld->getBasePtr();
17683
17684 // Only constant pools with no offset are supported.
17685 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
17686 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
17687 if (!CNode || CNode->isMachineConstantPoolEntry() ||
17688 CNode->getOffset() != 0)
17689 return nullptr;
17690
17691 return CNode;
17692 };
17693
17694 // Simple case, LLA.
17695 if (Ptr.getOpcode() == RISCVISD::LLA) {
17696 auto *CNode = GetSupportedConstantPool(Ptr);
17697 if (!CNode || CNode->getTargetFlags() != 0)
17698 return nullptr;
17699
17700 return CNode->getConstVal();
17701 }
17702
17703 // Look for a HI and ADD_LO pair.
17704 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
17705 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
17706 return nullptr;
17707
17708 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
17709 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
17710
17711 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
17712 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
17713 return nullptr;
17714
17715 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
17716 return nullptr;
17717
17718 return CNodeLo->getConstVal();
17719}
17720
17722 MachineBasicBlock *BB) {
17723 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
17724
17725 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
17726 // Should the count have wrapped while it was being read, we need to try
17727 // again.
17728 // For example:
17729 // ```
17730 // read:
17731 // csrrs x3, counterh # load high word of counter
17732 // csrrs x2, counter # load low word of counter
17733 // csrrs x4, counterh # load high word of counter
17734 // bne x3, x4, read # check if high word reads match, otherwise try again
17735 // ```
17736
17737 MachineFunction &MF = *BB->getParent();
17738 const BasicBlock *LLVMBB = BB->getBasicBlock();
17740
17741 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
17742 MF.insert(It, LoopMBB);
17743
17744 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
17745 MF.insert(It, DoneMBB);
17746
17747 // Transfer the remainder of BB and its successor edges to DoneMBB.
17748 DoneMBB->splice(DoneMBB->begin(), BB,
17749 std::next(MachineBasicBlock::iterator(MI)), BB->end());
17751
17752 BB->addSuccessor(LoopMBB);
17753
17755 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17756 Register LoReg = MI.getOperand(0).getReg();
17757 Register HiReg = MI.getOperand(1).getReg();
17758 int64_t LoCounter = MI.getOperand(2).getImm();
17759 int64_t HiCounter = MI.getOperand(3).getImm();
17760 DebugLoc DL = MI.getDebugLoc();
17761
17763 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
17764 .addImm(HiCounter)
17765 .addReg(RISCV::X0);
17766 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
17767 .addImm(LoCounter)
17768 .addReg(RISCV::X0);
17769 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
17770 .addImm(HiCounter)
17771 .addReg(RISCV::X0);
17772
17773 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
17774 .addReg(HiReg)
17775 .addReg(ReadAgainReg)
17776 .addMBB(LoopMBB);
17777
17778 LoopMBB->addSuccessor(LoopMBB);
17779 LoopMBB->addSuccessor(DoneMBB);
17780
17781 MI.eraseFromParent();
17782
17783 return DoneMBB;
17784}
17785
17788 const RISCVSubtarget &Subtarget) {
17789 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
17790
17791 MachineFunction &MF = *BB->getParent();
17792 DebugLoc DL = MI.getDebugLoc();
17795 Register LoReg = MI.getOperand(0).getReg();
17796 Register HiReg = MI.getOperand(1).getReg();
17797 Register SrcReg = MI.getOperand(2).getReg();
17798
17799 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
17800 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17801
17802 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
17803 RI, Register());
17805 MachineMemOperand *MMOLo =
17809 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
17810 .addFrameIndex(FI)
17811 .addImm(0)
17812 .addMemOperand(MMOLo);
17813 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
17814 .addFrameIndex(FI)
17815 .addImm(4)
17816 .addMemOperand(MMOHi);
17817 MI.eraseFromParent(); // The pseudo instruction is gone now.
17818 return BB;
17819}
17820
17823 const RISCVSubtarget &Subtarget) {
17824 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
17825 "Unexpected instruction");
17826
17827 MachineFunction &MF = *BB->getParent();
17828 DebugLoc DL = MI.getDebugLoc();
17831 Register DstReg = MI.getOperand(0).getReg();
17832 Register LoReg = MI.getOperand(1).getReg();
17833 Register HiReg = MI.getOperand(2).getReg();
17834
17835 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
17836 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17837
17839 MachineMemOperand *MMOLo =
17843 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17844 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
17845 .addFrameIndex(FI)
17846 .addImm(0)
17847 .addMemOperand(MMOLo);
17848 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17849 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
17850 .addFrameIndex(FI)
17851 .addImm(4)
17852 .addMemOperand(MMOHi);
17853 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
17854 MI.eraseFromParent(); // The pseudo instruction is gone now.
17855 return BB;
17856}
17857
17859 switch (MI.getOpcode()) {
17860 default:
17861 return false;
17862 case RISCV::Select_GPR_Using_CC_GPR:
17863 case RISCV::Select_GPR_Using_CC_Imm:
17864 case RISCV::Select_FPR16_Using_CC_GPR:
17865 case RISCV::Select_FPR16INX_Using_CC_GPR:
17866 case RISCV::Select_FPR32_Using_CC_GPR:
17867 case RISCV::Select_FPR32INX_Using_CC_GPR:
17868 case RISCV::Select_FPR64_Using_CC_GPR:
17869 case RISCV::Select_FPR64INX_Using_CC_GPR:
17870 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
17871 return true;
17872 }
17873}
17874
17876 unsigned RelOpcode, unsigned EqOpcode,
17877 const RISCVSubtarget &Subtarget) {
17878 DebugLoc DL = MI.getDebugLoc();
17879 Register DstReg = MI.getOperand(0).getReg();
17880 Register Src1Reg = MI.getOperand(1).getReg();
17881 Register Src2Reg = MI.getOperand(2).getReg();
17883 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17885
17886 // Save the current FFLAGS.
17887 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
17888
17889 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
17890 .addReg(Src1Reg)
17891 .addReg(Src2Reg);
17894
17895 // Restore the FFLAGS.
17896 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
17897 .addReg(SavedFFlags, RegState::Kill);
17898
17899 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
17900 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
17901 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
17902 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
17905
17906 // Erase the pseudoinstruction.
17907 MI.eraseFromParent();
17908 return BB;
17909}
17910
17911static MachineBasicBlock *
17913 MachineBasicBlock *ThisMBB,
17914 const RISCVSubtarget &Subtarget) {
17915 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
17916 // Without this, custom-inserter would have generated:
17917 //
17918 // A
17919 // | \
17920 // | B
17921 // | /
17922 // C
17923 // | \
17924 // | D
17925 // | /
17926 // E
17927 //
17928 // A: X = ...; Y = ...
17929 // B: empty
17930 // C: Z = PHI [X, A], [Y, B]
17931 // D: empty
17932 // E: PHI [X, C], [Z, D]
17933 //
17934 // If we lower both Select_FPRX_ in a single step, we can instead generate:
17935 //
17936 // A
17937 // | \
17938 // | C
17939 // | /|
17940 // |/ |
17941 // | |
17942 // | D
17943 // | /
17944 // E
17945 //
17946 // A: X = ...; Y = ...
17947 // D: empty
17948 // E: PHI [X, A], [X, C], [Y, D]
17949
17950 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17951 const DebugLoc &DL = First.getDebugLoc();
17952 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
17953 MachineFunction *F = ThisMBB->getParent();
17954 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
17955 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
17956 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
17957 MachineFunction::iterator It = ++ThisMBB->getIterator();
17958 F->insert(It, FirstMBB);
17959 F->insert(It, SecondMBB);
17960 F->insert(It, SinkMBB);
17961
17962 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
17963 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
17965 ThisMBB->end());
17966 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
17967
17968 // Fallthrough block for ThisMBB.
17969 ThisMBB->addSuccessor(FirstMBB);
17970 // Fallthrough block for FirstMBB.
17971 FirstMBB->addSuccessor(SecondMBB);
17972 ThisMBB->addSuccessor(SinkMBB);
17973 FirstMBB->addSuccessor(SinkMBB);
17974 // This is fallthrough.
17975 SecondMBB->addSuccessor(SinkMBB);
17976
17977 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
17978 Register FLHS = First.getOperand(1).getReg();
17979 Register FRHS = First.getOperand(2).getReg();
17980 // Insert appropriate branch.
17981 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
17982 .addReg(FLHS)
17983 .addReg(FRHS)
17984 .addMBB(SinkMBB);
17985
17986 Register SLHS = Second.getOperand(1).getReg();
17987 Register SRHS = Second.getOperand(2).getReg();
17988 Register Op1Reg4 = First.getOperand(4).getReg();
17989 Register Op1Reg5 = First.getOperand(5).getReg();
17990
17991 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
17992 // Insert appropriate branch.
17993 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
17994 .addReg(SLHS)
17995 .addReg(SRHS)
17996 .addMBB(SinkMBB);
17997
17998 Register DestReg = Second.getOperand(0).getReg();
17999 Register Op2Reg4 = Second.getOperand(4).getReg();
18000 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
18001 .addReg(Op2Reg4)
18002 .addMBB(ThisMBB)
18003 .addReg(Op1Reg4)
18004 .addMBB(FirstMBB)
18005 .addReg(Op1Reg5)
18006 .addMBB(SecondMBB);
18007
18008 // Now remove the Select_FPRX_s.
18009 First.eraseFromParent();
18010 Second.eraseFromParent();
18011 return SinkMBB;
18012}
18013
18016 const RISCVSubtarget &Subtarget) {
18017 // To "insert" Select_* instructions, we actually have to insert the triangle
18018 // control-flow pattern. The incoming instructions know the destination vreg
18019 // to set, the condition code register to branch on, the true/false values to
18020 // select between, and the condcode to use to select the appropriate branch.
18021 //
18022 // We produce the following control flow:
18023 // HeadMBB
18024 // | \
18025 // | IfFalseMBB
18026 // | /
18027 // TailMBB
18028 //
18029 // When we find a sequence of selects we attempt to optimize their emission
18030 // by sharing the control flow. Currently we only handle cases where we have
18031 // multiple selects with the exact same condition (same LHS, RHS and CC).
18032 // The selects may be interleaved with other instructions if the other
18033 // instructions meet some requirements we deem safe:
18034 // - They are not pseudo instructions.
18035 // - They are debug instructions. Otherwise,
18036 // - They do not have side-effects, do not access memory and their inputs do
18037 // not depend on the results of the select pseudo-instructions.
18038 // The TrueV/FalseV operands of the selects cannot depend on the result of
18039 // previous selects in the sequence.
18040 // These conditions could be further relaxed. See the X86 target for a
18041 // related approach and more information.
18042 //
18043 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
18044 // is checked here and handled by a separate function -
18045 // EmitLoweredCascadedSelect.
18046
18047 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
18048 if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
18049 MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
18050 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
18051 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
18052 Next->getOperand(5).isKill())
18053 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
18054
18055 Register LHS = MI.getOperand(1).getReg();
18056 Register RHS;
18057 if (MI.getOperand(2).isReg())
18058 RHS = MI.getOperand(2).getReg();
18059 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
18060
18061 SmallVector<MachineInstr *, 4> SelectDebugValues;
18062 SmallSet<Register, 4> SelectDests;
18063 SelectDests.insert(MI.getOperand(0).getReg());
18064
18065 MachineInstr *LastSelectPseudo = &MI;
18066 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
18067 SequenceMBBI != E; ++SequenceMBBI) {
18068 if (SequenceMBBI->isDebugInstr())
18069 continue;
18070 if (isSelectPseudo(*SequenceMBBI)) {
18071 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
18072 !SequenceMBBI->getOperand(2).isReg() ||
18073 SequenceMBBI->getOperand(2).getReg() != RHS ||
18074 SequenceMBBI->getOperand(3).getImm() != CC ||
18075 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
18076 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
18077 break;
18078 LastSelectPseudo = &*SequenceMBBI;
18079 SequenceMBBI->collectDebugValues(SelectDebugValues);
18080 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
18081 continue;
18082 }
18083 if (SequenceMBBI->hasUnmodeledSideEffects() ||
18084 SequenceMBBI->mayLoadOrStore() ||
18085 SequenceMBBI->usesCustomInsertionHook())
18086 break;
18087 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
18088 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
18089 }))
18090 break;
18091 }
18092
18093 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
18094 const BasicBlock *LLVM_BB = BB->getBasicBlock();
18095 DebugLoc DL = MI.getDebugLoc();
18097
18098 MachineBasicBlock *HeadMBB = BB;
18099 MachineFunction *F = BB->getParent();
18100 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
18101 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
18102
18103 F->insert(I, IfFalseMBB);
18104 F->insert(I, TailMBB);
18105
18106 // Set the call frame size on entry to the new basic blocks.
18107 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
18108 IfFalseMBB->setCallFrameSize(CallFrameSize);
18109 TailMBB->setCallFrameSize(CallFrameSize);
18110
18111 // Transfer debug instructions associated with the selects to TailMBB.
18112 for (MachineInstr *DebugInstr : SelectDebugValues) {
18113 TailMBB->push_back(DebugInstr->removeFromParent());
18114 }
18115
18116 // Move all instructions after the sequence to TailMBB.
18117 TailMBB->splice(TailMBB->end(), HeadMBB,
18118 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
18119 // Update machine-CFG edges by transferring all successors of the current
18120 // block to the new block which will contain the Phi nodes for the selects.
18121 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
18122 // Set the successors for HeadMBB.
18123 HeadMBB->addSuccessor(IfFalseMBB);
18124 HeadMBB->addSuccessor(TailMBB);
18125
18126 // Insert appropriate branch.
18127 if (MI.getOperand(2).isImm())
18128 BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm()))
18129 .addReg(LHS)
18130 .addImm(MI.getOperand(2).getImm())
18131 .addMBB(TailMBB);
18132 else
18133 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
18134 .addReg(LHS)
18135 .addReg(RHS)
18136 .addMBB(TailMBB);
18137
18138 // IfFalseMBB just falls through to TailMBB.
18139 IfFalseMBB->addSuccessor(TailMBB);
18140
18141 // Create PHIs for all of the select pseudo-instructions.
18142 auto SelectMBBI = MI.getIterator();
18143 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
18144 auto InsertionPoint = TailMBB->begin();
18145 while (SelectMBBI != SelectEnd) {
18146 auto Next = std::next(SelectMBBI);
18147 if (isSelectPseudo(*SelectMBBI)) {
18148 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
18149 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
18150 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
18151 .addReg(SelectMBBI->getOperand(4).getReg())
18152 .addMBB(HeadMBB)
18153 .addReg(SelectMBBI->getOperand(5).getReg())
18154 .addMBB(IfFalseMBB);
18155 SelectMBBI->eraseFromParent();
18156 }
18157 SelectMBBI = Next;
18158 }
18159
18160 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
18161 return TailMBB;
18162}
18163
18164// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
18165static const RISCV::RISCVMaskedPseudoInfo *
18166lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
18168 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
18169 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
18171 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
18172 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
18173 return Masked;
18174}
18175
18178 unsigned CVTXOpc) {
18179 DebugLoc DL = MI.getDebugLoc();
18180
18182
18184 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18185
18186 // Save the old value of FFLAGS.
18187 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
18188
18189 assert(MI.getNumOperands() == 7);
18190
18191 // Emit a VFCVT_X_F
18192 const TargetRegisterInfo *TRI =
18194 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
18195 Register Tmp = MRI.createVirtualRegister(RC);
18196 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
18197 .add(MI.getOperand(1))
18198 .add(MI.getOperand(2))
18199 .add(MI.getOperand(3))
18200 .add(MachineOperand::CreateImm(7)) // frm = DYN
18201 .add(MI.getOperand(4))
18202 .add(MI.getOperand(5))
18203 .add(MI.getOperand(6))
18204 .add(MachineOperand::CreateReg(RISCV::FRM,
18205 /*IsDef*/ false,
18206 /*IsImp*/ true));
18207
18208 // Emit a VFCVT_F_X
18209 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
18210 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
18211 // There is no E8 variant for VFCVT_F_X.
18212 assert(Log2SEW >= 4);
18213 unsigned CVTFOpc =
18214 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
18215 ->MaskedPseudo;
18216
18217 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
18218 .add(MI.getOperand(0))
18219 .add(MI.getOperand(1))
18220 .addReg(Tmp)
18221 .add(MI.getOperand(3))
18222 .add(MachineOperand::CreateImm(7)) // frm = DYN
18223 .add(MI.getOperand(4))
18224 .add(MI.getOperand(5))
18225 .add(MI.getOperand(6))
18226 .add(MachineOperand::CreateReg(RISCV::FRM,
18227 /*IsDef*/ false,
18228 /*IsImp*/ true));
18229
18230 // Restore FFLAGS.
18231 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
18232 .addReg(SavedFFLAGS, RegState::Kill);
18233
18234 // Erase the pseudoinstruction.
18235 MI.eraseFromParent();
18236 return BB;
18237}
18238
18240 const RISCVSubtarget &Subtarget) {
18241 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
18242 const TargetRegisterClass *RC;
18243 switch (MI.getOpcode()) {
18244 default:
18245 llvm_unreachable("Unexpected opcode");
18246 case RISCV::PseudoFROUND_H:
18247 CmpOpc = RISCV::FLT_H;
18248 F2IOpc = RISCV::FCVT_W_H;
18249 I2FOpc = RISCV::FCVT_H_W;
18250 FSGNJOpc = RISCV::FSGNJ_H;
18251 FSGNJXOpc = RISCV::FSGNJX_H;
18252 RC = &RISCV::FPR16RegClass;
18253 break;
18254 case RISCV::PseudoFROUND_H_INX:
18255 CmpOpc = RISCV::FLT_H_INX;
18256 F2IOpc = RISCV::FCVT_W_H_INX;
18257 I2FOpc = RISCV::FCVT_H_W_INX;
18258 FSGNJOpc = RISCV::FSGNJ_H_INX;
18259 FSGNJXOpc = RISCV::FSGNJX_H_INX;
18260 RC = &RISCV::GPRF16RegClass;
18261 break;
18262 case RISCV::PseudoFROUND_S:
18263 CmpOpc = RISCV::FLT_S;
18264 F2IOpc = RISCV::FCVT_W_S;
18265 I2FOpc = RISCV::FCVT_S_W;
18266 FSGNJOpc = RISCV::FSGNJ_S;
18267 FSGNJXOpc = RISCV::FSGNJX_S;
18268 RC = &RISCV::FPR32RegClass;
18269 break;
18270 case RISCV::PseudoFROUND_S_INX:
18271 CmpOpc = RISCV::FLT_S_INX;
18272 F2IOpc = RISCV::FCVT_W_S_INX;
18273 I2FOpc = RISCV::FCVT_S_W_INX;
18274 FSGNJOpc = RISCV::FSGNJ_S_INX;
18275 FSGNJXOpc = RISCV::FSGNJX_S_INX;
18276 RC = &RISCV::GPRF32RegClass;
18277 break;
18278 case RISCV::PseudoFROUND_D:
18279 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
18280 CmpOpc = RISCV::FLT_D;
18281 F2IOpc = RISCV::FCVT_L_D;
18282 I2FOpc = RISCV::FCVT_D_L;
18283 FSGNJOpc = RISCV::FSGNJ_D;
18284 FSGNJXOpc = RISCV::FSGNJX_D;
18285 RC = &RISCV::FPR64RegClass;
18286 break;
18287 case RISCV::PseudoFROUND_D_INX:
18288 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
18289 CmpOpc = RISCV::FLT_D_INX;
18290 F2IOpc = RISCV::FCVT_L_D_INX;
18291 I2FOpc = RISCV::FCVT_D_L_INX;
18292 FSGNJOpc = RISCV::FSGNJ_D_INX;
18293 FSGNJXOpc = RISCV::FSGNJX_D_INX;
18294 RC = &RISCV::GPRRegClass;
18295 break;
18296 }
18297
18298 const BasicBlock *BB = MBB->getBasicBlock();
18299 DebugLoc DL = MI.getDebugLoc();
18301
18303 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
18304 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
18305
18306 F->insert(I, CvtMBB);
18307 F->insert(I, DoneMBB);
18308 // Move all instructions after the sequence to DoneMBB.
18309 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
18310 MBB->end());
18311 // Update machine-CFG edges by transferring all successors of the current
18312 // block to the new block which will contain the Phi nodes for the selects.
18314 // Set the successors for MBB.
18315 MBB->addSuccessor(CvtMBB);
18316 MBB->addSuccessor(DoneMBB);
18317
18318 Register DstReg = MI.getOperand(0).getReg();
18319 Register SrcReg = MI.getOperand(1).getReg();
18320 Register MaxReg = MI.getOperand(2).getReg();
18321 int64_t FRM = MI.getOperand(3).getImm();
18322
18323 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
18325
18326 Register FabsReg = MRI.createVirtualRegister(RC);
18327 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
18328
18329 // Compare the FP value to the max value.
18330 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18331 auto MIB =
18332 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
18335
18336 // Insert branch.
18337 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
18338 .addReg(CmpReg)
18339 .addReg(RISCV::X0)
18340 .addMBB(DoneMBB);
18341
18342 CvtMBB->addSuccessor(DoneMBB);
18343
18344 // Convert to integer.
18345 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18346 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
18349
18350 // Convert back to FP.
18351 Register I2FReg = MRI.createVirtualRegister(RC);
18352 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
18355
18356 // Restore the sign bit.
18357 Register CvtReg = MRI.createVirtualRegister(RC);
18358 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
18359
18360 // Merge the results.
18361 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
18362 .addReg(SrcReg)
18363 .addMBB(MBB)
18364 .addReg(CvtReg)
18365 .addMBB(CvtMBB);
18366
18367 MI.eraseFromParent();
18368 return DoneMBB;
18369}
18370
18373 MachineBasicBlock *BB) const {
18374 switch (MI.getOpcode()) {
18375 default:
18376 llvm_unreachable("Unexpected instr type to insert");
18377 case RISCV::ReadCounterWide:
18378 assert(!Subtarget.is64Bit() &&
18379 "ReadCounterWide is only to be used on riscv32");
18380 return emitReadCounterWidePseudo(MI, BB);
18381 case RISCV::Select_GPR_Using_CC_GPR:
18382 case RISCV::Select_GPR_Using_CC_Imm:
18383 case RISCV::Select_FPR16_Using_CC_GPR:
18384 case RISCV::Select_FPR16INX_Using_CC_GPR:
18385 case RISCV::Select_FPR32_Using_CC_GPR:
18386 case RISCV::Select_FPR32INX_Using_CC_GPR:
18387 case RISCV::Select_FPR64_Using_CC_GPR:
18388 case RISCV::Select_FPR64INX_Using_CC_GPR:
18389 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
18390 return emitSelectPseudo(MI, BB, Subtarget);
18391 case RISCV::BuildPairF64Pseudo:
18392 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
18393 case RISCV::SplitF64Pseudo:
18394 return emitSplitF64Pseudo(MI, BB, Subtarget);
18395 case RISCV::PseudoQuietFLE_H:
18396 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
18397 case RISCV::PseudoQuietFLE_H_INX:
18398 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
18399 case RISCV::PseudoQuietFLT_H:
18400 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
18401 case RISCV::PseudoQuietFLT_H_INX:
18402 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
18403 case RISCV::PseudoQuietFLE_S:
18404 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
18405 case RISCV::PseudoQuietFLE_S_INX:
18406 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
18407 case RISCV::PseudoQuietFLT_S:
18408 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
18409 case RISCV::PseudoQuietFLT_S_INX:
18410 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
18411 case RISCV::PseudoQuietFLE_D:
18412 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
18413 case RISCV::PseudoQuietFLE_D_INX:
18414 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
18415 case RISCV::PseudoQuietFLE_D_IN32X:
18416 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
18417 Subtarget);
18418 case RISCV::PseudoQuietFLT_D:
18419 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
18420 case RISCV::PseudoQuietFLT_D_INX:
18421 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
18422 case RISCV::PseudoQuietFLT_D_IN32X:
18423 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
18424 Subtarget);
18425
18426 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
18427 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
18428 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
18429 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
18430 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
18431 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
18432 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
18433 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
18434 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
18435 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
18436 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
18437 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
18438 case RISCV::PseudoFROUND_H:
18439 case RISCV::PseudoFROUND_H_INX:
18440 case RISCV::PseudoFROUND_S:
18441 case RISCV::PseudoFROUND_S_INX:
18442 case RISCV::PseudoFROUND_D:
18443 case RISCV::PseudoFROUND_D_INX:
18444 case RISCV::PseudoFROUND_D_IN32X:
18445 return emitFROUND(MI, BB, Subtarget);
18446 case TargetOpcode::STATEPOINT:
18447 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
18448 // while jal call instruction (where statepoint will be lowered at the end)
18449 // has implicit def. This def is early-clobber as it will be set at
18450 // the moment of the call and earlier than any use is read.
18451 // Add this implicit dead def here as a workaround.
18452 MI.addOperand(*MI.getMF(),
18454 RISCV::X1, /*isDef*/ true,
18455 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
18456 /*isUndef*/ false, /*isEarlyClobber*/ true));
18457 [[fallthrough]];
18458 case TargetOpcode::STACKMAP:
18459 case TargetOpcode::PATCHPOINT:
18460 if (!Subtarget.is64Bit())
18461 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
18462 "supported on 64-bit targets");
18463 return emitPatchPoint(MI, BB);
18464 }
18465}
18466
18468 SDNode *Node) const {
18469 // Add FRM dependency to any instructions with dynamic rounding mode.
18470 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
18471 if (Idx < 0) {
18472 // Vector pseudos have FRM index indicated by TSFlags.
18473 Idx = RISCVII::getFRMOpNum(MI.getDesc());
18474 if (Idx < 0)
18475 return;
18476 }
18477 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
18478 return;
18479 // If the instruction already reads FRM, don't add another read.
18480 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
18481 return;
18482 MI.addOperand(
18483 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
18484}
18485
18486// Calling Convention Implementation.
18487// The expectations for frontend ABI lowering vary from target to target.
18488// Ideally, an LLVM frontend would be able to avoid worrying about many ABI
18489// details, but this is a longer term goal. For now, we simply try to keep the
18490// role of the frontend as simple and well-defined as possible. The rules can
18491// be summarised as:
18492// * Never split up large scalar arguments. We handle them here.
18493// * If a hardfloat calling convention is being used, and the struct may be
18494// passed in a pair of registers (fp+fp, int+fp), and both registers are
18495// available, then pass as two separate arguments. If either the GPRs or FPRs
18496// are exhausted, then pass according to the rule below.
18497// * If a struct could never be passed in registers or directly in a stack
18498// slot (as it is larger than 2*XLEN and the floating point rules don't
18499// apply), then pass it using a pointer with the byval attribute.
18500// * If a struct is less than 2*XLEN, then coerce to either a two-element
18501// word-sized array or a 2*XLEN scalar (depending on alignment).
18502// * The frontend can determine whether a struct is returned by reference or
18503// not based on its size and fields. If it will be returned by reference, the
18504// frontend must modify the prototype so a pointer with the sret annotation is
18505// passed as the first argument. This is not necessary for large scalar
18506// returns.
18507// * Struct return values and varargs should be coerced to structs containing
18508// register-size fields in the same situations they would be for fixed
18509// arguments.
18510
18511static const MCPhysReg ArgFPR16s[] = {
18512 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
18513 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
18514};
18515static const MCPhysReg ArgFPR32s[] = {
18516 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
18517 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
18518};
18519static const MCPhysReg ArgFPR64s[] = {
18520 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
18521 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
18522};
18523// This is an interim calling convention and it may be changed in the future.
18524static const MCPhysReg ArgVRs[] = {
18525 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
18526 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
18527 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
18528static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
18529 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
18530 RISCV::V20M2, RISCV::V22M2};
18531static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
18532 RISCV::V20M4};
18533static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
18534
18536 // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
18537 // the ILP32E ABI.
18538 static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18539 RISCV::X13, RISCV::X14, RISCV::X15,
18540 RISCV::X16, RISCV::X17};
18541 // The GPRs used for passing arguments in the ILP32E/ILP64E ABI.
18542 static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18543 RISCV::X13, RISCV::X14, RISCV::X15};
18544
18545 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18546 return ArrayRef(ArgEGPRs);
18547
18548 return ArrayRef(ArgIGPRs);
18549}
18550
18552 // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
18553 // for save-restore libcall, so we don't use them.
18554 // Don't use X7 for fastcc, since Zicfilp uses X7 as the label register.
18555 static const MCPhysReg FastCCIGPRs[] = {
18556 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15,
18557 RISCV::X16, RISCV::X17, RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31};
18558
18559 // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
18560 static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18561 RISCV::X13, RISCV::X14, RISCV::X15};
18562
18563 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18564 return ArrayRef(FastCCEGPRs);
18565
18566 return ArrayRef(FastCCIGPRs);
18567}
18568
18569// Pass a 2*XLEN argument that has been split into two XLEN values through
18570// registers or the stack as necessary.
18571static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
18572 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
18573 MVT ValVT2, MVT LocVT2,
18574 ISD::ArgFlagsTy ArgFlags2, bool EABI) {
18575 unsigned XLenInBytes = XLen / 8;
18576 const RISCVSubtarget &STI =
18579
18580 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18581 // At least one half can be passed via register.
18582 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
18583 VA1.getLocVT(), CCValAssign::Full));
18584 } else {
18585 // Both halves must be passed on the stack, with proper alignment.
18586 // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte
18587 // alignment. This behavior may be changed when RV32E/ILP32E is ratified.
18588 Align StackAlign(XLenInBytes);
18589 if (!EABI || XLen != 32)
18590 StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign());
18591 State.addLoc(
18593 State.AllocateStack(XLenInBytes, StackAlign),
18594 VA1.getLocVT(), CCValAssign::Full));
18596 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18597 LocVT2, CCValAssign::Full));
18598 return false;
18599 }
18600
18601 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18602 // The second half can also be passed via register.
18603 State.addLoc(
18604 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
18605 } else {
18606 // The second half is passed via the stack, without additional alignment.
18608 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18609 LocVT2, CCValAssign::Full));
18610 }
18611
18612 return false;
18613}
18614
18615// Implements the RISC-V calling convention. Returns true upon failure.
18616bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
18617 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
18618 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
18619 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
18620 RVVArgDispatcher &RVVDispatcher) {
18621 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
18622 assert(XLen == 32 || XLen == 64);
18623 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
18624
18625 // Static chain parameter must not be passed in normal argument registers,
18626 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
18627 if (ArgFlags.isNest()) {
18628 if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
18629 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18630 return false;
18631 }
18632 }
18633
18634 // Any return value split in to more than two values can't be returned
18635 // directly. Vectors are returned via the available vector registers.
18636 if (!LocVT.isVector() && IsRet && ValNo > 1)
18637 return true;
18638
18639 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
18640 // variadic argument, or if no F16/F32 argument registers are available.
18641 bool UseGPRForF16_F32 = true;
18642 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
18643 // variadic argument, or if no F64 argument registers are available.
18644 bool UseGPRForF64 = true;
18645
18646 switch (ABI) {
18647 default:
18648 llvm_unreachable("Unexpected ABI");
18651 case RISCVABI::ABI_LP64:
18653 break;
18656 UseGPRForF16_F32 = !IsFixed;
18657 break;
18660 UseGPRForF16_F32 = !IsFixed;
18661 UseGPRForF64 = !IsFixed;
18662 break;
18663 }
18664
18665 // FPR16, FPR32, and FPR64 alias each other.
18666 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
18667 UseGPRForF16_F32 = true;
18668 UseGPRForF64 = true;
18669 }
18670
18671 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
18672 // similar local variables rather than directly checking against the target
18673 // ABI.
18674
18675 if (UseGPRForF16_F32 &&
18676 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
18677 LocVT = XLenVT;
18678 LocInfo = CCValAssign::BCvt;
18679 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
18680 LocVT = MVT::i64;
18681 LocInfo = CCValAssign::BCvt;
18682 }
18683
18685
18686 // If this is a variadic argument, the RISC-V calling convention requires
18687 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
18688 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
18689 // be used regardless of whether the original argument was split during
18690 // legalisation or not. The argument will not be passed by registers if the
18691 // original type is larger than 2*XLEN, so the register alignment rule does
18692 // not apply.
18693 // TODO: To be compatible with GCC's behaviors, we don't align registers
18694 // currently if we are using ILP32E calling convention. This behavior may be
18695 // changed when RV32E/ILP32E is ratified.
18696 unsigned TwoXLenInBytes = (2 * XLen) / 8;
18697 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
18698 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes &&
18699 ABI != RISCVABI::ABI_ILP32E) {
18700 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
18701 // Skip 'odd' register if necessary.
18702 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
18703 State.AllocateReg(ArgGPRs);
18704 }
18705
18706 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
18707 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
18708 State.getPendingArgFlags();
18709
18710 assert(PendingLocs.size() == PendingArgFlags.size() &&
18711 "PendingLocs and PendingArgFlags out of sync");
18712
18713 // Handle passing f64 on RV32D with a soft float ABI or when floating point
18714 // registers are exhausted.
18715 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
18716 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
18717 // Depending on available argument GPRS, f64 may be passed in a pair of
18718 // GPRs, split between a GPR and the stack, or passed completely on the
18719 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
18720 // cases.
18721 Register Reg = State.AllocateReg(ArgGPRs);
18722 if (!Reg) {
18723 unsigned StackOffset = State.AllocateStack(8, Align(8));
18724 State.addLoc(
18725 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18726 return false;
18727 }
18728 LocVT = MVT::i32;
18729 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18730 Register HiReg = State.AllocateReg(ArgGPRs);
18731 if (HiReg) {
18732 State.addLoc(
18733 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
18734 } else {
18735 unsigned StackOffset = State.AllocateStack(4, Align(4));
18736 State.addLoc(
18737 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18738 }
18739 return false;
18740 }
18741
18742 // Fixed-length vectors are located in the corresponding scalable-vector
18743 // container types.
18744 if (ValVT.isFixedLengthVector())
18745 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
18746
18747 // Split arguments might be passed indirectly, so keep track of the pending
18748 // values. Split vectors are passed via a mix of registers and indirectly, so
18749 // treat them as we would any other argument.
18750 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
18751 LocVT = XLenVT;
18752 LocInfo = CCValAssign::Indirect;
18753 PendingLocs.push_back(
18754 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
18755 PendingArgFlags.push_back(ArgFlags);
18756 if (!ArgFlags.isSplitEnd()) {
18757 return false;
18758 }
18759 }
18760
18761 // If the split argument only had two elements, it should be passed directly
18762 // in registers or on the stack.
18763 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
18764 PendingLocs.size() <= 2) {
18765 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
18766 // Apply the normal calling convention rules to the first half of the
18767 // split argument.
18768 CCValAssign VA = PendingLocs[0];
18769 ISD::ArgFlagsTy AF = PendingArgFlags[0];
18770 PendingLocs.clear();
18771 PendingArgFlags.clear();
18772 return CC_RISCVAssign2XLen(
18773 XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags,
18774 ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E);
18775 }
18776
18777 // Allocate to a register if possible, or else a stack slot.
18778 Register Reg;
18779 unsigned StoreSizeBytes = XLen / 8;
18780 Align StackAlign = Align(XLen / 8);
18781
18782 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
18783 Reg = State.AllocateReg(ArgFPR16s);
18784 else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
18785 Reg = State.AllocateReg(ArgFPR32s);
18786 else if (ValVT == MVT::f64 && !UseGPRForF64)
18787 Reg = State.AllocateReg(ArgFPR64s);
18788 else if (ValVT.isVector()) {
18789 Reg = RVVDispatcher.getNextPhysReg();
18790 if (!Reg) {
18791 // For return values, the vector must be passed fully via registers or
18792 // via the stack.
18793 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
18794 // but we're using all of them.
18795 if (IsRet)
18796 return true;
18797 // Try using a GPR to pass the address
18798 if ((Reg = State.AllocateReg(ArgGPRs))) {
18799 LocVT = XLenVT;
18800 LocInfo = CCValAssign::Indirect;
18801 } else if (ValVT.isScalableVector()) {
18802 LocVT = XLenVT;
18803 LocInfo = CCValAssign::Indirect;
18804 } else {
18805 // Pass fixed-length vectors on the stack.
18806 LocVT = ValVT;
18807 StoreSizeBytes = ValVT.getStoreSize();
18808 // Align vectors to their element sizes, being careful for vXi1
18809 // vectors.
18810 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
18811 }
18812 }
18813 } else {
18814 Reg = State.AllocateReg(ArgGPRs);
18815 }
18816
18817 unsigned StackOffset =
18818 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
18819
18820 // If we reach this point and PendingLocs is non-empty, we must be at the
18821 // end of a split argument that must be passed indirectly.
18822 if (!PendingLocs.empty()) {
18823 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
18824 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
18825
18826 for (auto &It : PendingLocs) {
18827 if (Reg)
18828 It.convertToReg(Reg);
18829 else
18830 It.convertToMem(StackOffset);
18831 State.addLoc(It);
18832 }
18833 PendingLocs.clear();
18834 PendingArgFlags.clear();
18835 return false;
18836 }
18837
18838 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
18839 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
18840 "Expected an XLenVT or vector types at this stage");
18841
18842 if (Reg) {
18843 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18844 return false;
18845 }
18846
18847 // When a scalar floating-point value is passed on the stack, no
18848 // bit-conversion is needed.
18849 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
18850 assert(!ValVT.isVector());
18851 LocVT = ValVT;
18852 LocInfo = CCValAssign::Full;
18853 }
18854 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18855 return false;
18856}
18857
18858template <typename ArgTy>
18859static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
18860 for (const auto &ArgIdx : enumerate(Args)) {
18861 MVT ArgVT = ArgIdx.value().VT;
18862 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
18863 return ArgIdx.index();
18864 }
18865 return std::nullopt;
18866}
18867
18868void RISCVTargetLowering::analyzeInputArgs(
18869 MachineFunction &MF, CCState &CCInfo,
18870 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
18871 RISCVCCAssignFn Fn) const {
18872 unsigned NumArgs = Ins.size();
18874
18875 RVVArgDispatcher Dispatcher;
18876 if (IsRet) {
18877 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)};
18878 } else {
18879 SmallVector<Type *, 4> TypeList;
18880 for (const Argument &Arg : MF.getFunction().args())
18881 TypeList.push_back(Arg.getType());
18882 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)};
18883 }
18884
18885 for (unsigned i = 0; i != NumArgs; ++i) {
18886 MVT ArgVT = Ins[i].VT;
18887 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
18888
18889 Type *ArgTy = nullptr;
18890 if (IsRet)
18891 ArgTy = FType->getReturnType();
18892 else if (Ins[i].isOrigArg())
18893 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
18894
18896 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18897 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
18898 Dispatcher)) {
18899 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
18900 << ArgVT << '\n');
18901 llvm_unreachable(nullptr);
18902 }
18903 }
18904}
18905
18906void RISCVTargetLowering::analyzeOutputArgs(
18907 MachineFunction &MF, CCState &CCInfo,
18908 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
18909 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
18910 unsigned NumArgs = Outs.size();
18911
18912 SmallVector<Type *, 4> TypeList;
18913 if (IsRet)
18914 TypeList.push_back(MF.getFunction().getReturnType());
18915 else if (CLI)
18916 for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs())
18917 TypeList.push_back(Arg.Ty);
18918 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)};
18919
18920 for (unsigned i = 0; i != NumArgs; i++) {
18921 MVT ArgVT = Outs[i].VT;
18922 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
18923 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
18924
18926 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18927 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
18928 Dispatcher)) {
18929 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
18930 << ArgVT << "\n");
18931 llvm_unreachable(nullptr);
18932 }
18933 }
18934}
18935
18936// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
18937// values.
18939 const CCValAssign &VA, const SDLoc &DL,
18940 const RISCVSubtarget &Subtarget) {
18941 switch (VA.getLocInfo()) {
18942 default:
18943 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18944 case CCValAssign::Full:
18946 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
18947 break;
18948 case CCValAssign::BCvt:
18949 if (VA.getLocVT().isInteger() &&
18950 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
18951 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
18952 } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
18953 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
18954 } else {
18955 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
18956 }
18957 break;
18958 }
18959 return Val;
18960}
18961
18962// The caller is responsible for loading the full value if the argument is
18963// passed with CCValAssign::Indirect.
18965 const CCValAssign &VA, const SDLoc &DL,
18966 const ISD::InputArg &In,
18967 const RISCVTargetLowering &TLI) {
18970 EVT LocVT = VA.getLocVT();
18971 SDValue Val;
18972 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
18973 Register VReg = RegInfo.createVirtualRegister(RC);
18974 RegInfo.addLiveIn(VA.getLocReg(), VReg);
18975 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
18976
18977 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
18978 if (In.isOrigArg()) {
18979 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
18980 if (OrigArg->getType()->isIntegerTy()) {
18981 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
18982 // An input zero extended from i31 can also be considered sign extended.
18983 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
18984 (BitWidth < 32 && In.Flags.isZExt())) {
18986 RVFI->addSExt32Register(VReg);
18987 }
18988 }
18989 }
18990
18992 return Val;
18993
18994 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
18995}
18996
18998 const CCValAssign &VA, const SDLoc &DL,
18999 const RISCVSubtarget &Subtarget) {
19000 EVT LocVT = VA.getLocVT();
19001
19002 switch (VA.getLocInfo()) {
19003 default:
19004 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19005 case CCValAssign::Full:
19006 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
19007 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
19008 break;
19009 case CCValAssign::BCvt:
19010 if (LocVT.isInteger() &&
19011 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
19012 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
19013 } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
19014 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
19015 } else {
19016 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
19017 }
19018 break;
19019 }
19020 return Val;
19021}
19022
19023// The caller is responsible for loading the full value if the argument is
19024// passed with CCValAssign::Indirect.
19026 const CCValAssign &VA, const SDLoc &DL) {
19028 MachineFrameInfo &MFI = MF.getFrameInfo();
19029 EVT LocVT = VA.getLocVT();
19030 EVT ValVT = VA.getValVT();
19032 if (ValVT.isScalableVector()) {
19033 // When the value is a scalable vector, we save the pointer which points to
19034 // the scalable vector value in the stack. The ValVT will be the pointer
19035 // type, instead of the scalable vector type.
19036 ValVT = LocVT;
19037 }
19038 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
19039 /*IsImmutable=*/true);
19040 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19041 SDValue Val;
19042
19043 ISD::LoadExtType ExtType;
19044 switch (VA.getLocInfo()) {
19045 default:
19046 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19047 case CCValAssign::Full:
19049 case CCValAssign::BCvt:
19050 ExtType = ISD::NON_EXTLOAD;
19051 break;
19052 }
19053 Val = DAG.getExtLoad(
19054 ExtType, DL, LocVT, Chain, FIN,
19056 return Val;
19057}
19058
19060 const CCValAssign &VA,
19061 const CCValAssign &HiVA,
19062 const SDLoc &DL) {
19063 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
19064 "Unexpected VA");
19066 MachineFrameInfo &MFI = MF.getFrameInfo();
19068
19069 assert(VA.isRegLoc() && "Expected register VA assignment");
19070
19071 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19072 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
19073 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
19074 SDValue Hi;
19075 if (HiVA.isMemLoc()) {
19076 // Second half of f64 is passed on the stack.
19077 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
19078 /*IsImmutable=*/true);
19079 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
19080 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
19082 } else {
19083 // Second half of f64 is passed in another GPR.
19084 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19085 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
19086 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
19087 }
19088 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
19089}
19090
19091// FastCC has less than 1% performance improvement for some particular
19092// benchmark. But theoretically, it may has benenfit for some cases.
19094 unsigned ValNo, MVT ValVT, MVT LocVT,
19095 CCValAssign::LocInfo LocInfo,
19096 ISD::ArgFlagsTy ArgFlags, CCState &State,
19097 bool IsFixed, bool IsRet, Type *OrigTy,
19098 const RISCVTargetLowering &TLI,
19099 RVVArgDispatcher &RVVDispatcher) {
19100 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
19101 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
19102 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19103 return false;
19104 }
19105 }
19106
19107 const RISCVSubtarget &Subtarget = TLI.getSubtarget();
19108
19109 if (LocVT == MVT::f16 &&
19110 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
19111 static const MCPhysReg FPR16List[] = {
19112 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
19113 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
19114 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
19115 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
19116 if (unsigned Reg = State.AllocateReg(FPR16List)) {
19117 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19118 return false;
19119 }
19120 }
19121
19122 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
19123 static const MCPhysReg FPR32List[] = {
19124 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
19125 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
19126 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
19127 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
19128 if (unsigned Reg = State.AllocateReg(FPR32List)) {
19129 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19130 return false;
19131 }
19132 }
19133
19134 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
19135 static const MCPhysReg FPR64List[] = {
19136 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
19137 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
19138 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
19139 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
19140 if (unsigned Reg = State.AllocateReg(FPR64List)) {
19141 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19142 return false;
19143 }
19144 }
19145
19146 // Check if there is an available GPR before hitting the stack.
19147 if ((LocVT == MVT::f16 &&
19148 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
19149 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
19150 (LocVT == MVT::f64 && Subtarget.is64Bit() &&
19151 Subtarget.hasStdExtZdinx())) {
19152 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
19153 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19154 return false;
19155 }
19156 }
19157
19158 if (LocVT == MVT::f16) {
19159 unsigned Offset2 = State.AllocateStack(2, Align(2));
19160 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
19161 return false;
19162 }
19163
19164 if (LocVT == MVT::i32 || LocVT == MVT::f32) {
19165 unsigned Offset4 = State.AllocateStack(4, Align(4));
19166 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
19167 return false;
19168 }
19169
19170 if (LocVT == MVT::i64 || LocVT == MVT::f64) {
19171 unsigned Offset5 = State.AllocateStack(8, Align(8));
19172 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
19173 return false;
19174 }
19175
19176 if (LocVT.isVector()) {
19177 MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg();
19178 if (AllocatedVReg) {
19179 // Fixed-length vectors are located in the corresponding scalable-vector
19180 // container types.
19181 if (ValVT.isFixedLengthVector())
19182 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
19183 State.addLoc(
19184 CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo));
19185 } else {
19186 // Try and pass the address via a "fast" GPR.
19187 if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
19188 LocInfo = CCValAssign::Indirect;
19189 LocVT = TLI.getSubtarget().getXLenVT();
19190 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
19191 } else if (ValVT.isFixedLengthVector()) {
19192 auto StackAlign =
19194 unsigned StackOffset =
19195 State.AllocateStack(ValVT.getStoreSize(), StackAlign);
19196 State.addLoc(
19197 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
19198 } else {
19199 // Can't pass scalable vectors on the stack.
19200 return true;
19201 }
19202 }
19203
19204 return false;
19205 }
19206
19207 return true; // CC didn't match.
19208}
19209
19210bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
19211 CCValAssign::LocInfo LocInfo,
19212 ISD::ArgFlagsTy ArgFlags, CCState &State) {
19213 if (ArgFlags.isNest()) {
19215 "Attribute 'nest' is not supported in GHC calling convention");
19216 }
19217
19218 static const MCPhysReg GPRList[] = {
19219 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
19220 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
19221
19222 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
19223 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
19224 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
19225 if (unsigned Reg = State.AllocateReg(GPRList)) {
19226 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19227 return false;
19228 }
19229 }
19230
19231 const RISCVSubtarget &Subtarget =
19233
19234 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
19235 // Pass in STG registers: F1, ..., F6
19236 // fs0 ... fs5
19237 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
19238 RISCV::F18_F, RISCV::F19_F,
19239 RISCV::F20_F, RISCV::F21_F};
19240 if (unsigned Reg = State.AllocateReg(FPR32List)) {
19241 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19242 return false;
19243 }
19244 }
19245
19246 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
19247 // Pass in STG registers: D1, ..., D6
19248 // fs6 ... fs11
19249 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
19250 RISCV::F24_D, RISCV::F25_D,
19251 RISCV::F26_D, RISCV::F27_D};
19252 if (unsigned Reg = State.AllocateReg(FPR64List)) {
19253 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19254 return false;
19255 }
19256 }
19257
19258 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
19259 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
19260 Subtarget.is64Bit())) {
19261 if (unsigned Reg = State.AllocateReg(GPRList)) {
19262 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19263 return false;
19264 }
19265 }
19266
19267 report_fatal_error("No registers left in GHC calling convention");
19268 return true;
19269}
19270
19271// Transform physical registers into virtual registers.
19273 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
19274 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
19275 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
19276
19278
19279 switch (CallConv) {
19280 default:
19281 report_fatal_error("Unsupported calling convention");
19282 case CallingConv::C:
19283 case CallingConv::Fast:
19285 case CallingConv::GRAAL:
19287 break;
19288 case CallingConv::GHC:
19289 if (Subtarget.hasStdExtE())
19290 report_fatal_error("GHC calling convention is not supported on RVE!");
19291 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
19292 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
19293 "(Zdinx/D) instruction set extensions");
19294 }
19295
19296 const Function &Func = MF.getFunction();
19297 if (Func.hasFnAttribute("interrupt")) {
19298 if (!Func.arg_empty())
19300 "Functions with the interrupt attribute cannot have arguments!");
19301
19302 StringRef Kind =
19303 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19304
19305 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
19307 "Function interrupt attribute argument not supported!");
19308 }
19309
19310 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19311 MVT XLenVT = Subtarget.getXLenVT();
19312 unsigned XLenInBytes = Subtarget.getXLen() / 8;
19313 // Used with vargs to acumulate store chains.
19314 std::vector<SDValue> OutChains;
19315
19316 // Assign locations to all of the incoming arguments.
19318 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19319
19320 if (CallConv == CallingConv::GHC)
19322 else
19323 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
19325 : RISCV::CC_RISCV);
19326
19327 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
19328 CCValAssign &VA = ArgLocs[i];
19329 SDValue ArgValue;
19330 // Passing f64 on RV32D with a soft float ABI must be handled as a special
19331 // case.
19332 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19333 assert(VA.needsCustom());
19334 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
19335 } else if (VA.isRegLoc())
19336 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
19337 else
19338 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
19339
19340 if (VA.getLocInfo() == CCValAssign::Indirect) {
19341 // If the original argument was split and passed by reference (e.g. i128
19342 // on RV32), we need to load all parts of it here (using the same
19343 // address). Vectors may be partly split to registers and partly to the
19344 // stack, in which case the base address is partly offset and subsequent
19345 // stores are relative to that.
19346 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
19348 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
19349 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
19350 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19351 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
19352 CCValAssign &PartVA = ArgLocs[i + 1];
19353 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
19354 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19355 if (PartVA.getValVT().isScalableVector())
19356 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19357 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
19358 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
19360 ++i;
19361 ++InsIdx;
19362 }
19363 continue;
19364 }
19365 InVals.push_back(ArgValue);
19366 }
19367
19368 if (any_of(ArgLocs,
19369 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19370 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19371
19372 if (IsVarArg) {
19373 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
19374 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
19375 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
19376 MachineFrameInfo &MFI = MF.getFrameInfo();
19377 MachineRegisterInfo &RegInfo = MF.getRegInfo();
19379
19380 // Size of the vararg save area. For now, the varargs save area is either
19381 // zero or large enough to hold a0-a7.
19382 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
19383 int FI;
19384
19385 // If all registers are allocated, then all varargs must be passed on the
19386 // stack and we don't need to save any argregs.
19387 if (VarArgsSaveSize == 0) {
19388 int VaArgOffset = CCInfo.getStackSize();
19389 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
19390 } else {
19391 int VaArgOffset = -VarArgsSaveSize;
19392 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
19393
19394 // If saving an odd number of registers then create an extra stack slot to
19395 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
19396 // offsets to even-numbered registered remain 2*XLEN-aligned.
19397 if (Idx % 2) {
19399 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
19400 VarArgsSaveSize += XLenInBytes;
19401 }
19402
19403 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19404
19405 // Copy the integer registers that may have been used for passing varargs
19406 // to the vararg save area.
19407 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
19408 const Register Reg = RegInfo.createVirtualRegister(RC);
19409 RegInfo.addLiveIn(ArgRegs[I], Reg);
19410 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
19411 SDValue Store = DAG.getStore(
19412 Chain, DL, ArgValue, FIN,
19413 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
19414 OutChains.push_back(Store);
19415 FIN =
19416 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
19417 }
19418 }
19419
19420 // Record the frame index of the first variable argument
19421 // which is a value necessary to VASTART.
19422 RVFI->setVarArgsFrameIndex(FI);
19423 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
19424 }
19425
19426 // All stores are grouped in one node to allow the matching between
19427 // the size of Ins and InVals. This only happens for vararg functions.
19428 if (!OutChains.empty()) {
19429 OutChains.push_back(Chain);
19430 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
19431 }
19432
19433 return Chain;
19434}
19435
19436/// isEligibleForTailCallOptimization - Check whether the call is eligible
19437/// for tail call optimization.
19438/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
19439bool RISCVTargetLowering::isEligibleForTailCallOptimization(
19440 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
19441 const SmallVector<CCValAssign, 16> &ArgLocs) const {
19442
19443 auto CalleeCC = CLI.CallConv;
19444 auto &Outs = CLI.Outs;
19445 auto &Caller = MF.getFunction();
19446 auto CallerCC = Caller.getCallingConv();
19447
19448 // Exception-handling functions need a special set of instructions to
19449 // indicate a return to the hardware. Tail-calling another function would
19450 // probably break this.
19451 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
19452 // should be expanded as new function attributes are introduced.
19453 if (Caller.hasFnAttribute("interrupt"))
19454 return false;
19455
19456 // Do not tail call opt if the stack is used to pass parameters.
19457 if (CCInfo.getStackSize() != 0)
19458 return false;
19459
19460 // Do not tail call opt if any parameters need to be passed indirectly.
19461 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
19462 // passed indirectly. So the address of the value will be passed in a
19463 // register, or if not available, then the address is put on the stack. In
19464 // order to pass indirectly, space on the stack often needs to be allocated
19465 // in order to store the value. In this case the CCInfo.getNextStackOffset()
19466 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
19467 // are passed CCValAssign::Indirect.
19468 for (auto &VA : ArgLocs)
19469 if (VA.getLocInfo() == CCValAssign::Indirect)
19470 return false;
19471
19472 // Do not tail call opt if either caller or callee uses struct return
19473 // semantics.
19474 auto IsCallerStructRet = Caller.hasStructRetAttr();
19475 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
19476 if (IsCallerStructRet || IsCalleeStructRet)
19477 return false;
19478
19479 // The callee has to preserve all registers the caller needs to preserve.
19480 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
19481 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
19482 if (CalleeCC != CallerCC) {
19483 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
19484 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
19485 return false;
19486 }
19487
19488 // Byval parameters hand the function a pointer directly into the stack area
19489 // we want to reuse during a tail call. Working around this *is* possible
19490 // but less efficient and uglier in LowerCall.
19491 for (auto &Arg : Outs)
19492 if (Arg.Flags.isByVal())
19493 return false;
19494
19495 return true;
19496}
19497
19499 return DAG.getDataLayout().getPrefTypeAlign(
19500 VT.getTypeForEVT(*DAG.getContext()));
19501}
19502
19503// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
19504// and output parameter nodes.
19506 SmallVectorImpl<SDValue> &InVals) const {
19507 SelectionDAG &DAG = CLI.DAG;
19508 SDLoc &DL = CLI.DL;
19510 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
19512 SDValue Chain = CLI.Chain;
19513 SDValue Callee = CLI.Callee;
19514 bool &IsTailCall = CLI.IsTailCall;
19515 CallingConv::ID CallConv = CLI.CallConv;
19516 bool IsVarArg = CLI.IsVarArg;
19517 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19518 MVT XLenVT = Subtarget.getXLenVT();
19519
19521
19522 // Analyze the operands of the call, assigning locations to each operand.
19524 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19525
19526 if (CallConv == CallingConv::GHC) {
19527 if (Subtarget.hasStdExtE())
19528 report_fatal_error("GHC calling convention is not supported on RVE!");
19530 } else
19531 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
19533 : RISCV::CC_RISCV);
19534
19535 // Check if it's really possible to do a tail call.
19536 if (IsTailCall)
19537 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
19538
19539 if (IsTailCall)
19540 ++NumTailCalls;
19541 else if (CLI.CB && CLI.CB->isMustTailCall())
19542 report_fatal_error("failed to perform tail call elimination on a call "
19543 "site marked musttail");
19544
19545 // Get a count of how many bytes are to be pushed on the stack.
19546 unsigned NumBytes = ArgCCInfo.getStackSize();
19547
19548 // Create local copies for byval args
19549 SmallVector<SDValue, 8> ByValArgs;
19550 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19551 ISD::ArgFlagsTy Flags = Outs[i].Flags;
19552 if (!Flags.isByVal())
19553 continue;
19554
19555 SDValue Arg = OutVals[i];
19556 unsigned Size = Flags.getByValSize();
19557 Align Alignment = Flags.getNonZeroByValAlign();
19558
19559 int FI =
19560 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
19561 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
19562 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
19563
19564 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
19565 /*IsVolatile=*/false,
19566 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
19568 ByValArgs.push_back(FIPtr);
19569 }
19570
19571 if (!IsTailCall)
19572 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
19573
19574 // Copy argument values to their designated locations.
19576 SmallVector<SDValue, 8> MemOpChains;
19577 SDValue StackPtr;
19578 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
19579 ++i, ++OutIdx) {
19580 CCValAssign &VA = ArgLocs[i];
19581 SDValue ArgValue = OutVals[OutIdx];
19582 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
19583
19584 // Handle passing f64 on RV32D with a soft float ABI as a special case.
19585 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19586 assert(VA.isRegLoc() && "Expected register VA assignment");
19587 assert(VA.needsCustom());
19588 SDValue SplitF64 = DAG.getNode(
19589 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
19590 SDValue Lo = SplitF64.getValue(0);
19591 SDValue Hi = SplitF64.getValue(1);
19592
19593 Register RegLo = VA.getLocReg();
19594 RegsToPass.push_back(std::make_pair(RegLo, Lo));
19595
19596 // Get the CCValAssign for the Hi part.
19597 CCValAssign &HiVA = ArgLocs[++i];
19598
19599 if (HiVA.isMemLoc()) {
19600 // Second half of f64 is passed on the stack.
19601 if (!StackPtr.getNode())
19602 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19604 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19605 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
19606 // Emit the store.
19607 MemOpChains.push_back(
19608 DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo()));
19609 } else {
19610 // Second half of f64 is passed in another GPR.
19611 Register RegHigh = HiVA.getLocReg();
19612 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
19613 }
19614 continue;
19615 }
19616
19617 // Promote the value if needed.
19618 // For now, only handle fully promoted and indirect arguments.
19619 if (VA.getLocInfo() == CCValAssign::Indirect) {
19620 // Store the argument in a stack slot and pass its address.
19621 Align StackAlign =
19622 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
19623 getPrefTypeAlign(ArgValue.getValueType(), DAG));
19624 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
19625 // If the original argument was split (e.g. i128), we need
19626 // to store the required parts of it here (and pass just one address).
19627 // Vectors may be partly split to registers and partly to the stack, in
19628 // which case the base address is partly offset and subsequent stores are
19629 // relative to that.
19630 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
19631 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
19632 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19633 // Calculate the total size to store. We don't have access to what we're
19634 // actually storing other than performing the loop and collecting the
19635 // info.
19637 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
19638 SDValue PartValue = OutVals[OutIdx + 1];
19639 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
19640 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19641 EVT PartVT = PartValue.getValueType();
19642 if (PartVT.isScalableVector())
19643 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19644 StoredSize += PartVT.getStoreSize();
19645 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
19646 Parts.push_back(std::make_pair(PartValue, Offset));
19647 ++i;
19648 ++OutIdx;
19649 }
19650 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
19651 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
19652 MemOpChains.push_back(
19653 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
19655 for (const auto &Part : Parts) {
19656 SDValue PartValue = Part.first;
19657 SDValue PartOffset = Part.second;
19659 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
19660 MemOpChains.push_back(
19661 DAG.getStore(Chain, DL, PartValue, Address,
19663 }
19664 ArgValue = SpillSlot;
19665 } else {
19666 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
19667 }
19668
19669 // Use local copy if it is a byval arg.
19670 if (Flags.isByVal())
19671 ArgValue = ByValArgs[j++];
19672
19673 if (VA.isRegLoc()) {
19674 // Queue up the argument copies and emit them at the end.
19675 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
19676 } else {
19677 assert(VA.isMemLoc() && "Argument not register or memory");
19678 assert(!IsTailCall && "Tail call not allowed if stack is used "
19679 "for passing parameters");
19680
19681 // Work out the address of the stack slot.
19682 if (!StackPtr.getNode())
19683 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19685 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19687
19688 // Emit the store.
19689 MemOpChains.push_back(
19690 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
19691 }
19692 }
19693
19694 // Join the stores, which are independent of one another.
19695 if (!MemOpChains.empty())
19696 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
19697
19698 SDValue Glue;
19699
19700 // Build a sequence of copy-to-reg nodes, chained and glued together.
19701 for (auto &Reg : RegsToPass) {
19702 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
19703 Glue = Chain.getValue(1);
19704 }
19705
19706 // Validate that none of the argument registers have been marked as
19707 // reserved, if so report an error. Do the same for the return address if this
19708 // is not a tailcall.
19709 validateCCReservedRegs(RegsToPass, MF);
19710 if (!IsTailCall &&
19713 MF.getFunction(),
19714 "Return address register required, but has been reserved."});
19715
19716 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
19717 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
19718 // split it and then direct call can be matched by PseudoCALL.
19719 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
19720 const GlobalValue *GV = S->getGlobal();
19721 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
19722 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
19723 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
19724 }
19725
19726 // The first call operand is the chain and the second is the target address.
19728 Ops.push_back(Chain);
19729 Ops.push_back(Callee);
19730
19731 // Add argument registers to the end of the list so that they are
19732 // known live into the call.
19733 for (auto &Reg : RegsToPass)
19734 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
19735
19736 if (!IsTailCall) {
19737 // Add a register mask operand representing the call-preserved registers.
19738 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
19739 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
19740 assert(Mask && "Missing call preserved mask for calling convention");
19741 Ops.push_back(DAG.getRegisterMask(Mask));
19742 }
19743
19744 // Glue the call to the argument copies, if any.
19745 if (Glue.getNode())
19746 Ops.push_back(Glue);
19747
19748 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
19749 "Unexpected CFI type for a direct call");
19750
19751 // Emit the call.
19752 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
19753
19754 if (IsTailCall) {
19756 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
19757 if (CLI.CFIType)
19758 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19759 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
19760 return Ret;
19761 }
19762
19763 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
19764 if (CLI.CFIType)
19765 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19766 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
19767 Glue = Chain.getValue(1);
19768
19769 // Mark the end of the call, which is glued to the call itself.
19770 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
19771 Glue = Chain.getValue(1);
19772
19773 // Assign locations to each value returned by this call.
19775 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
19776 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
19777
19778 // Copy all of the result registers out of their specified physreg.
19779 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
19780 auto &VA = RVLocs[i];
19781 // Copy the value out
19782 SDValue RetValue =
19783 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
19784 // Glue the RetValue to the end of the call sequence
19785 Chain = RetValue.getValue(1);
19786 Glue = RetValue.getValue(2);
19787
19788 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19789 assert(VA.needsCustom());
19790 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
19791 MVT::i32, Glue);
19792 Chain = RetValue2.getValue(1);
19793 Glue = RetValue2.getValue(2);
19794 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
19795 RetValue2);
19796 }
19797
19798 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
19799
19800 InVals.push_back(RetValue);
19801 }
19802
19803 return Chain;
19804}
19805
19807 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
19808 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
19810 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
19811
19812 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)};
19813
19814 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19815 MVT VT = Outs[i].VT;
19816 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19817 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
19818 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
19819 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
19820 nullptr, *this, Dispatcher))
19821 return false;
19822 }
19823 return true;
19824}
19825
19826SDValue
19828 bool IsVarArg,
19830 const SmallVectorImpl<SDValue> &OutVals,
19831 const SDLoc &DL, SelectionDAG &DAG) const {
19833 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19834
19835 // Stores the assignment of the return value to a location.
19837
19838 // Info about the registers and stack slot.
19839 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
19840 *DAG.getContext());
19841
19842 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
19843 nullptr, RISCV::CC_RISCV);
19844
19845 if (CallConv == CallingConv::GHC && !RVLocs.empty())
19846 report_fatal_error("GHC functions return void only");
19847
19848 SDValue Glue;
19849 SmallVector<SDValue, 4> RetOps(1, Chain);
19850
19851 // Copy the result values into the output registers.
19852 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
19853 SDValue Val = OutVals[OutIdx];
19854 CCValAssign &VA = RVLocs[i];
19855 assert(VA.isRegLoc() && "Can only return in registers!");
19856
19857 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19858 // Handle returning f64 on RV32D with a soft float ABI.
19859 assert(VA.isRegLoc() && "Expected return via registers");
19860 assert(VA.needsCustom());
19861 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
19862 DAG.getVTList(MVT::i32, MVT::i32), Val);
19863 SDValue Lo = SplitF64.getValue(0);
19864 SDValue Hi = SplitF64.getValue(1);
19865 Register RegLo = VA.getLocReg();
19866 Register RegHi = RVLocs[++i].getLocReg();
19867
19868 if (STI.isRegisterReservedByUser(RegLo) ||
19869 STI.isRegisterReservedByUser(RegHi))
19871 MF.getFunction(),
19872 "Return value register required, but has been reserved."});
19873
19874 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
19875 Glue = Chain.getValue(1);
19876 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
19877 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
19878 Glue = Chain.getValue(1);
19879 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
19880 } else {
19881 // Handle a 'normal' return.
19882 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
19883 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
19884
19885 if (STI.isRegisterReservedByUser(VA.getLocReg()))
19887 MF.getFunction(),
19888 "Return value register required, but has been reserved."});
19889
19890 // Guarantee that all emitted copies are stuck together.
19891 Glue = Chain.getValue(1);
19892 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
19893 }
19894 }
19895
19896 RetOps[0] = Chain; // Update chain.
19897
19898 // Add the glue node if we have it.
19899 if (Glue.getNode()) {
19900 RetOps.push_back(Glue);
19901 }
19902
19903 if (any_of(RVLocs,
19904 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19905 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19906
19907 unsigned RetOpc = RISCVISD::RET_GLUE;
19908 // Interrupt service routines use different return instructions.
19909 const Function &Func = DAG.getMachineFunction().getFunction();
19910 if (Func.hasFnAttribute("interrupt")) {
19911 if (!Func.getReturnType()->isVoidTy())
19913 "Functions with the interrupt attribute must have void return type!");
19914
19916 StringRef Kind =
19917 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19918
19919 if (Kind == "supervisor")
19920 RetOpc = RISCVISD::SRET_GLUE;
19921 else
19922 RetOpc = RISCVISD::MRET_GLUE;
19923 }
19924
19925 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
19926}
19927
19928void RISCVTargetLowering::validateCCReservedRegs(
19929 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
19930 MachineFunction &MF) const {
19931 const Function &F = MF.getFunction();
19932 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19933
19934 if (llvm::any_of(Regs, [&STI](auto Reg) {
19935 return STI.isRegisterReservedByUser(Reg.first);
19936 }))
19937 F.getContext().diagnose(DiagnosticInfoUnsupported{
19938 F, "Argument register required, but has been reserved."});
19939}
19940
19941// Check if the result of the node is only used as a return value, as
19942// otherwise we can't perform a tail-call.
19944 if (N->getNumValues() != 1)
19945 return false;
19946 if (!N->hasNUsesOfValue(1, 0))
19947 return false;
19948
19949 SDNode *Copy = *N->use_begin();
19950
19951 if (Copy->getOpcode() == ISD::BITCAST) {
19952 return isUsedByReturnOnly(Copy, Chain);
19953 }
19954
19955 // TODO: Handle additional opcodes in order to support tail-calling libcalls
19956 // with soft float ABIs.
19957 if (Copy->getOpcode() != ISD::CopyToReg) {
19958 return false;
19959 }
19960
19961 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
19962 // isn't safe to perform a tail call.
19963 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
19964 return false;
19965
19966 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
19967 bool HasRet = false;
19968 for (SDNode *Node : Copy->uses()) {
19969 if (Node->getOpcode() != RISCVISD::RET_GLUE)
19970 return false;
19971 HasRet = true;
19972 }
19973 if (!HasRet)
19974 return false;
19975
19976 Chain = Copy->getOperand(0);
19977 return true;
19978}
19979
19981 return CI->isTailCall();
19982}
19983
19984const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
19985#define NODE_NAME_CASE(NODE) \
19986 case RISCVISD::NODE: \
19987 return "RISCVISD::" #NODE;
19988 // clang-format off
19989 switch ((RISCVISD::NodeType)Opcode) {
19991 break;
19992 NODE_NAME_CASE(RET_GLUE)
19993 NODE_NAME_CASE(SRET_GLUE)
19994 NODE_NAME_CASE(MRET_GLUE)
19995 NODE_NAME_CASE(CALL)
19996 NODE_NAME_CASE(SELECT_CC)
19997 NODE_NAME_CASE(BR_CC)
19998 NODE_NAME_CASE(BuildPairF64)
19999 NODE_NAME_CASE(SplitF64)
20000 NODE_NAME_CASE(TAIL)
20001 NODE_NAME_CASE(ADD_LO)
20002 NODE_NAME_CASE(HI)
20003 NODE_NAME_CASE(LLA)
20004 NODE_NAME_CASE(ADD_TPREL)
20005 NODE_NAME_CASE(MULHSU)
20006 NODE_NAME_CASE(SHL_ADD)
20007 NODE_NAME_CASE(SLLW)
20008 NODE_NAME_CASE(SRAW)
20009 NODE_NAME_CASE(SRLW)
20010 NODE_NAME_CASE(DIVW)
20011 NODE_NAME_CASE(DIVUW)
20012 NODE_NAME_CASE(REMUW)
20013 NODE_NAME_CASE(ROLW)
20014 NODE_NAME_CASE(RORW)
20015 NODE_NAME_CASE(CLZW)
20016 NODE_NAME_CASE(CTZW)
20017 NODE_NAME_CASE(ABSW)
20018 NODE_NAME_CASE(FMV_H_X)
20019 NODE_NAME_CASE(FMV_X_ANYEXTH)
20020 NODE_NAME_CASE(FMV_X_SIGNEXTH)
20021 NODE_NAME_CASE(FMV_W_X_RV64)
20022 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
20023 NODE_NAME_CASE(FCVT_X)
20024 NODE_NAME_CASE(FCVT_XU)
20025 NODE_NAME_CASE(FCVT_W_RV64)
20026 NODE_NAME_CASE(FCVT_WU_RV64)
20027 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
20028 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
20029 NODE_NAME_CASE(FP_ROUND_BF16)
20030 NODE_NAME_CASE(FP_EXTEND_BF16)
20031 NODE_NAME_CASE(FROUND)
20032 NODE_NAME_CASE(FCLASS)
20033 NODE_NAME_CASE(FSGNJX)
20034 NODE_NAME_CASE(FMAX)
20035 NODE_NAME_CASE(FMIN)
20036 NODE_NAME_CASE(READ_COUNTER_WIDE)
20037 NODE_NAME_CASE(BREV8)
20038 NODE_NAME_CASE(ORC_B)
20039 NODE_NAME_CASE(ZIP)
20040 NODE_NAME_CASE(UNZIP)
20041 NODE_NAME_CASE(CLMUL)
20042 NODE_NAME_CASE(CLMULH)
20043 NODE_NAME_CASE(CLMULR)
20044 NODE_NAME_CASE(MOPR)
20045 NODE_NAME_CASE(MOPRR)
20046 NODE_NAME_CASE(SHA256SIG0)
20047 NODE_NAME_CASE(SHA256SIG1)
20048 NODE_NAME_CASE(SHA256SUM0)
20049 NODE_NAME_CASE(SHA256SUM1)
20050 NODE_NAME_CASE(SM4KS)
20051 NODE_NAME_CASE(SM4ED)
20052 NODE_NAME_CASE(SM3P0)
20053 NODE_NAME_CASE(SM3P1)
20054 NODE_NAME_CASE(TH_LWD)
20055 NODE_NAME_CASE(TH_LWUD)
20056 NODE_NAME_CASE(TH_LDD)
20057 NODE_NAME_CASE(TH_SWD)
20058 NODE_NAME_CASE(TH_SDD)
20059 NODE_NAME_CASE(VMV_V_V_VL)
20060 NODE_NAME_CASE(VMV_V_X_VL)
20061 NODE_NAME_CASE(VFMV_V_F_VL)
20062 NODE_NAME_CASE(VMV_X_S)
20063 NODE_NAME_CASE(VMV_S_X_VL)
20064 NODE_NAME_CASE(VFMV_S_F_VL)
20065 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
20066 NODE_NAME_CASE(READ_VLENB)
20067 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
20068 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_SSAT)
20069 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_USAT)
20070 NODE_NAME_CASE(VSLIDEUP_VL)
20071 NODE_NAME_CASE(VSLIDE1UP_VL)
20072 NODE_NAME_CASE(VSLIDEDOWN_VL)
20073 NODE_NAME_CASE(VSLIDE1DOWN_VL)
20074 NODE_NAME_CASE(VFSLIDE1UP_VL)
20075 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
20076 NODE_NAME_CASE(VID_VL)
20077 NODE_NAME_CASE(VFNCVT_ROD_VL)
20078 NODE_NAME_CASE(VECREDUCE_ADD_VL)
20079 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
20080 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
20081 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
20082 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
20083 NODE_NAME_CASE(VECREDUCE_AND_VL)
20084 NODE_NAME_CASE(VECREDUCE_OR_VL)
20085 NODE_NAME_CASE(VECREDUCE_XOR_VL)
20086 NODE_NAME_CASE(VECREDUCE_FADD_VL)
20087 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
20088 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
20089 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
20090 NODE_NAME_CASE(ADD_VL)
20091 NODE_NAME_CASE(AND_VL)
20092 NODE_NAME_CASE(MUL_VL)
20093 NODE_NAME_CASE(OR_VL)
20094 NODE_NAME_CASE(SDIV_VL)
20095 NODE_NAME_CASE(SHL_VL)
20096 NODE_NAME_CASE(SREM_VL)
20097 NODE_NAME_CASE(SRA_VL)
20098 NODE_NAME_CASE(SRL_VL)
20099 NODE_NAME_CASE(ROTL_VL)
20100 NODE_NAME_CASE(ROTR_VL)
20101 NODE_NAME_CASE(SUB_VL)
20102 NODE_NAME_CASE(UDIV_VL)
20103 NODE_NAME_CASE(UREM_VL)
20104 NODE_NAME_CASE(XOR_VL)
20105 NODE_NAME_CASE(AVGFLOORS_VL)
20106 NODE_NAME_CASE(AVGFLOORU_VL)
20107 NODE_NAME_CASE(AVGCEILS_VL)
20108 NODE_NAME_CASE(AVGCEILU_VL)
20109 NODE_NAME_CASE(SADDSAT_VL)
20110 NODE_NAME_CASE(UADDSAT_VL)
20111 NODE_NAME_CASE(SSUBSAT_VL)
20112 NODE_NAME_CASE(USUBSAT_VL)
20113 NODE_NAME_CASE(FADD_VL)
20114 NODE_NAME_CASE(FSUB_VL)
20115 NODE_NAME_CASE(FMUL_VL)
20116 NODE_NAME_CASE(FDIV_VL)
20117 NODE_NAME_CASE(FNEG_VL)
20118 NODE_NAME_CASE(FABS_VL)
20119 NODE_NAME_CASE(FSQRT_VL)
20120 NODE_NAME_CASE(FCLASS_VL)
20121 NODE_NAME_CASE(VFMADD_VL)
20122 NODE_NAME_CASE(VFNMADD_VL)
20123 NODE_NAME_CASE(VFMSUB_VL)
20124 NODE_NAME_CASE(VFNMSUB_VL)
20125 NODE_NAME_CASE(VFWMADD_VL)
20126 NODE_NAME_CASE(VFWNMADD_VL)
20127 NODE_NAME_CASE(VFWMSUB_VL)
20128 NODE_NAME_CASE(VFWNMSUB_VL)
20129 NODE_NAME_CASE(FCOPYSIGN_VL)
20130 NODE_NAME_CASE(SMIN_VL)
20131 NODE_NAME_CASE(SMAX_VL)
20132 NODE_NAME_CASE(UMIN_VL)
20133 NODE_NAME_CASE(UMAX_VL)
20134 NODE_NAME_CASE(BITREVERSE_VL)
20135 NODE_NAME_CASE(BSWAP_VL)
20136 NODE_NAME_CASE(CTLZ_VL)
20137 NODE_NAME_CASE(CTTZ_VL)
20138 NODE_NAME_CASE(CTPOP_VL)
20139 NODE_NAME_CASE(VFMIN_VL)
20140 NODE_NAME_CASE(VFMAX_VL)
20141 NODE_NAME_CASE(MULHS_VL)
20142 NODE_NAME_CASE(MULHU_VL)
20143 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
20144 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
20145 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
20146 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
20147 NODE_NAME_CASE(VFCVT_X_F_VL)
20148 NODE_NAME_CASE(VFCVT_XU_F_VL)
20149 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
20150 NODE_NAME_CASE(SINT_TO_FP_VL)
20151 NODE_NAME_CASE(UINT_TO_FP_VL)
20152 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
20153 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
20154 NODE_NAME_CASE(FP_EXTEND_VL)
20155 NODE_NAME_CASE(FP_ROUND_VL)
20156 NODE_NAME_CASE(STRICT_FADD_VL)
20157 NODE_NAME_CASE(STRICT_FSUB_VL)
20158 NODE_NAME_CASE(STRICT_FMUL_VL)
20159 NODE_NAME_CASE(STRICT_FDIV_VL)
20160 NODE_NAME_CASE(STRICT_FSQRT_VL)
20161 NODE_NAME_CASE(STRICT_VFMADD_VL)
20162 NODE_NAME_CASE(STRICT_VFNMADD_VL)
20163 NODE_NAME_CASE(STRICT_VFMSUB_VL)
20164 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
20165 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
20166 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
20167 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
20168 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
20169 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
20170 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
20171 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
20172 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
20173 NODE_NAME_CASE(STRICT_FSETCC_VL)
20174 NODE_NAME_CASE(STRICT_FSETCCS_VL)
20175 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
20176 NODE_NAME_CASE(VWMUL_VL)
20177 NODE_NAME_CASE(VWMULU_VL)
20178 NODE_NAME_CASE(VWMULSU_VL)
20179 NODE_NAME_CASE(VWADD_VL)
20180 NODE_NAME_CASE(VWADDU_VL)
20181 NODE_NAME_CASE(VWSUB_VL)
20182 NODE_NAME_CASE(VWSUBU_VL)
20183 NODE_NAME_CASE(VWADD_W_VL)
20184 NODE_NAME_CASE(VWADDU_W_VL)
20185 NODE_NAME_CASE(VWSUB_W_VL)
20186 NODE_NAME_CASE(VWSUBU_W_VL)
20187 NODE_NAME_CASE(VWSLL_VL)
20188 NODE_NAME_CASE(VFWMUL_VL)
20189 NODE_NAME_CASE(VFWADD_VL)
20190 NODE_NAME_CASE(VFWSUB_VL)
20191 NODE_NAME_CASE(VFWADD_W_VL)
20192 NODE_NAME_CASE(VFWSUB_W_VL)
20193 NODE_NAME_CASE(VWMACC_VL)
20194 NODE_NAME_CASE(VWMACCU_VL)
20195 NODE_NAME_CASE(VWMACCSU_VL)
20196 NODE_NAME_CASE(VNSRL_VL)
20197 NODE_NAME_CASE(SETCC_VL)
20198 NODE_NAME_CASE(VMERGE_VL)
20199 NODE_NAME_CASE(VMAND_VL)
20200 NODE_NAME_CASE(VMOR_VL)
20201 NODE_NAME_CASE(VMXOR_VL)
20202 NODE_NAME_CASE(VMCLR_VL)
20203 NODE_NAME_CASE(VMSET_VL)
20204 NODE_NAME_CASE(VRGATHER_VX_VL)
20205 NODE_NAME_CASE(VRGATHER_VV_VL)
20206 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
20207 NODE_NAME_CASE(VSEXT_VL)
20208 NODE_NAME_CASE(VZEXT_VL)
20209 NODE_NAME_CASE(VCPOP_VL)
20210 NODE_NAME_CASE(VFIRST_VL)
20211 NODE_NAME_CASE(READ_CSR)
20212 NODE_NAME_CASE(WRITE_CSR)
20213 NODE_NAME_CASE(SWAP_CSR)
20214 NODE_NAME_CASE(CZERO_EQZ)
20215 NODE_NAME_CASE(CZERO_NEZ)
20216 NODE_NAME_CASE(SW_GUARDED_BRIND)
20217 NODE_NAME_CASE(SF_VC_XV_SE)
20218 NODE_NAME_CASE(SF_VC_IV_SE)
20219 NODE_NAME_CASE(SF_VC_VV_SE)
20220 NODE_NAME_CASE(SF_VC_FV_SE)
20221 NODE_NAME_CASE(SF_VC_XVV_SE)
20222 NODE_NAME_CASE(SF_VC_IVV_SE)
20223 NODE_NAME_CASE(SF_VC_VVV_SE)
20224 NODE_NAME_CASE(SF_VC_FVV_SE)
20225 NODE_NAME_CASE(SF_VC_XVW_SE)
20226 NODE_NAME_CASE(SF_VC_IVW_SE)
20227 NODE_NAME_CASE(SF_VC_VVW_SE)
20228 NODE_NAME_CASE(SF_VC_FVW_SE)
20229 NODE_NAME_CASE(SF_VC_V_X_SE)
20230 NODE_NAME_CASE(SF_VC_V_I_SE)
20231 NODE_NAME_CASE(SF_VC_V_XV_SE)
20232 NODE_NAME_CASE(SF_VC_V_IV_SE)
20233 NODE_NAME_CASE(SF_VC_V_VV_SE)
20234 NODE_NAME_CASE(SF_VC_V_FV_SE)
20235 NODE_NAME_CASE(SF_VC_V_XVV_SE)
20236 NODE_NAME_CASE(SF_VC_V_IVV_SE)
20237 NODE_NAME_CASE(SF_VC_V_VVV_SE)
20238 NODE_NAME_CASE(SF_VC_V_FVV_SE)
20239 NODE_NAME_CASE(SF_VC_V_XVW_SE)
20240 NODE_NAME_CASE(SF_VC_V_IVW_SE)
20241 NODE_NAME_CASE(SF_VC_V_VVW_SE)
20242 NODE_NAME_CASE(SF_VC_V_FVW_SE)
20243 }
20244 // clang-format on
20245 return nullptr;
20246#undef NODE_NAME_CASE
20247}
20248
20249/// getConstraintType - Given a constraint letter, return the type of
20250/// constraint it is for this target.
20253 if (Constraint.size() == 1) {
20254 switch (Constraint[0]) {
20255 default:
20256 break;
20257 case 'f':
20258 return C_RegisterClass;
20259 case 'I':
20260 case 'J':
20261 case 'K':
20262 return C_Immediate;
20263 case 'A':
20264 return C_Memory;
20265 case 's':
20266 case 'S': // A symbolic address
20267 return C_Other;
20268 }
20269 } else {
20270 if (Constraint == "vr" || Constraint == "vm")
20271 return C_RegisterClass;
20272 }
20273 return TargetLowering::getConstraintType(Constraint);
20274}
20275
20276std::pair<unsigned, const TargetRegisterClass *>
20278 StringRef Constraint,
20279 MVT VT) const {
20280 // First, see if this is a constraint that directly corresponds to a RISC-V
20281 // register class.
20282 if (Constraint.size() == 1) {
20283 switch (Constraint[0]) {
20284 case 'r':
20285 // TODO: Support fixed vectors up to XLen for P extension?
20286 if (VT.isVector())
20287 break;
20288 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
20289 return std::make_pair(0U, &RISCV::GPRF16RegClass);
20290 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
20291 return std::make_pair(0U, &RISCV::GPRF32RegClass);
20292 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20293 return std::make_pair(0U, &RISCV::GPRPairRegClass);
20294 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
20295 case 'f':
20296 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
20297 return std::make_pair(0U, &RISCV::FPR16RegClass);
20298 if (Subtarget.hasStdExtF() && VT == MVT::f32)
20299 return std::make_pair(0U, &RISCV::FPR32RegClass);
20300 if (Subtarget.hasStdExtD() && VT == MVT::f64)
20301 return std::make_pair(0U, &RISCV::FPR64RegClass);
20302 break;
20303 default:
20304 break;
20305 }
20306 } else if (Constraint == "vr") {
20307 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
20308 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20309 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
20310 return std::make_pair(0U, RC);
20311 }
20312 } else if (Constraint == "vm") {
20313 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
20314 return std::make_pair(0U, &RISCV::VMV0RegClass);
20315 }
20316
20317 // Clang will correctly decode the usage of register name aliases into their
20318 // official names. However, other frontends like `rustc` do not. This allows
20319 // users of these frontends to use the ABI names for registers in LLVM-style
20320 // register constraints.
20321 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
20322 .Case("{zero}", RISCV::X0)
20323 .Case("{ra}", RISCV::X1)
20324 .Case("{sp}", RISCV::X2)
20325 .Case("{gp}", RISCV::X3)
20326 .Case("{tp}", RISCV::X4)
20327 .Case("{t0}", RISCV::X5)
20328 .Case("{t1}", RISCV::X6)
20329 .Case("{t2}", RISCV::X7)
20330 .Cases("{s0}", "{fp}", RISCV::X8)
20331 .Case("{s1}", RISCV::X9)
20332 .Case("{a0}", RISCV::X10)
20333 .Case("{a1}", RISCV::X11)
20334 .Case("{a2}", RISCV::X12)
20335 .Case("{a3}", RISCV::X13)
20336 .Case("{a4}", RISCV::X14)
20337 .Case("{a5}", RISCV::X15)
20338 .Case("{a6}", RISCV::X16)
20339 .Case("{a7}", RISCV::X17)
20340 .Case("{s2}", RISCV::X18)
20341 .Case("{s3}", RISCV::X19)
20342 .Case("{s4}", RISCV::X20)
20343 .Case("{s5}", RISCV::X21)
20344 .Case("{s6}", RISCV::X22)
20345 .Case("{s7}", RISCV::X23)
20346 .Case("{s8}", RISCV::X24)
20347 .Case("{s9}", RISCV::X25)
20348 .Case("{s10}", RISCV::X26)
20349 .Case("{s11}", RISCV::X27)
20350 .Case("{t3}", RISCV::X28)
20351 .Case("{t4}", RISCV::X29)
20352 .Case("{t5}", RISCV::X30)
20353 .Case("{t6}", RISCV::X31)
20354 .Default(RISCV::NoRegister);
20355 if (XRegFromAlias != RISCV::NoRegister)
20356 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
20357
20358 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
20359 // TableGen record rather than the AsmName to choose registers for InlineAsm
20360 // constraints, plus we want to match those names to the widest floating point
20361 // register type available, manually select floating point registers here.
20362 //
20363 // The second case is the ABI name of the register, so that frontends can also
20364 // use the ABI names in register constraint lists.
20365 if (Subtarget.hasStdExtF()) {
20366 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
20367 .Cases("{f0}", "{ft0}", RISCV::F0_F)
20368 .Cases("{f1}", "{ft1}", RISCV::F1_F)
20369 .Cases("{f2}", "{ft2}", RISCV::F2_F)
20370 .Cases("{f3}", "{ft3}", RISCV::F3_F)
20371 .Cases("{f4}", "{ft4}", RISCV::F4_F)
20372 .Cases("{f5}", "{ft5}", RISCV::F5_F)
20373 .Cases("{f6}", "{ft6}", RISCV::F6_F)
20374 .Cases("{f7}", "{ft7}", RISCV::F7_F)
20375 .Cases("{f8}", "{fs0}", RISCV::F8_F)
20376 .Cases("{f9}", "{fs1}", RISCV::F9_F)
20377 .Cases("{f10}", "{fa0}", RISCV::F10_F)
20378 .Cases("{f11}", "{fa1}", RISCV::F11_F)
20379 .Cases("{f12}", "{fa2}", RISCV::F12_F)
20380 .Cases("{f13}", "{fa3}", RISCV::F13_F)
20381 .Cases("{f14}", "{fa4}", RISCV::F14_F)
20382 .Cases("{f15}", "{fa5}", RISCV::F15_F)
20383 .Cases("{f16}", "{fa6}", RISCV::F16_F)
20384 .Cases("{f17}", "{fa7}", RISCV::F17_F)
20385 .Cases("{f18}", "{fs2}", RISCV::F18_F)
20386 .Cases("{f19}", "{fs3}", RISCV::F19_F)
20387 .Cases("{f20}", "{fs4}", RISCV::F20_F)
20388 .Cases("{f21}", "{fs5}", RISCV::F21_F)
20389 .Cases("{f22}", "{fs6}", RISCV::F22_F)
20390 .Cases("{f23}", "{fs7}", RISCV::F23_F)
20391 .Cases("{f24}", "{fs8}", RISCV::F24_F)
20392 .Cases("{f25}", "{fs9}", RISCV::F25_F)
20393 .Cases("{f26}", "{fs10}", RISCV::F26_F)
20394 .Cases("{f27}", "{fs11}", RISCV::F27_F)
20395 .Cases("{f28}", "{ft8}", RISCV::F28_F)
20396 .Cases("{f29}", "{ft9}", RISCV::F29_F)
20397 .Cases("{f30}", "{ft10}", RISCV::F30_F)
20398 .Cases("{f31}", "{ft11}", RISCV::F31_F)
20399 .Default(RISCV::NoRegister);
20400 if (FReg != RISCV::NoRegister) {
20401 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
20402 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
20403 unsigned RegNo = FReg - RISCV::F0_F;
20404 unsigned DReg = RISCV::F0_D + RegNo;
20405 return std::make_pair(DReg, &RISCV::FPR64RegClass);
20406 }
20407 if (VT == MVT::f32 || VT == MVT::Other)
20408 return std::make_pair(FReg, &RISCV::FPR32RegClass);
20409 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
20410 unsigned RegNo = FReg - RISCV::F0_F;
20411 unsigned HReg = RISCV::F0_H + RegNo;
20412 return std::make_pair(HReg, &RISCV::FPR16RegClass);
20413 }
20414 }
20415 }
20416
20417 if (Subtarget.hasVInstructions()) {
20418 Register VReg = StringSwitch<Register>(Constraint.lower())
20419 .Case("{v0}", RISCV::V0)
20420 .Case("{v1}", RISCV::V1)
20421 .Case("{v2}", RISCV::V2)
20422 .Case("{v3}", RISCV::V3)
20423 .Case("{v4}", RISCV::V4)
20424 .Case("{v5}", RISCV::V5)
20425 .Case("{v6}", RISCV::V6)
20426 .Case("{v7}", RISCV::V7)
20427 .Case("{v8}", RISCV::V8)
20428 .Case("{v9}", RISCV::V9)
20429 .Case("{v10}", RISCV::V10)
20430 .Case("{v11}", RISCV::V11)
20431 .Case("{v12}", RISCV::V12)
20432 .Case("{v13}", RISCV::V13)
20433 .Case("{v14}", RISCV::V14)
20434 .Case("{v15}", RISCV::V15)
20435 .Case("{v16}", RISCV::V16)
20436 .Case("{v17}", RISCV::V17)
20437 .Case("{v18}", RISCV::V18)
20438 .Case("{v19}", RISCV::V19)
20439 .Case("{v20}", RISCV::V20)
20440 .Case("{v21}", RISCV::V21)
20441 .Case("{v22}", RISCV::V22)
20442 .Case("{v23}", RISCV::V23)
20443 .Case("{v24}", RISCV::V24)
20444 .Case("{v25}", RISCV::V25)
20445 .Case("{v26}", RISCV::V26)
20446 .Case("{v27}", RISCV::V27)
20447 .Case("{v28}", RISCV::V28)
20448 .Case("{v29}", RISCV::V29)
20449 .Case("{v30}", RISCV::V30)
20450 .Case("{v31}", RISCV::V31)
20451 .Default(RISCV::NoRegister);
20452 if (VReg != RISCV::NoRegister) {
20453 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
20454 return std::make_pair(VReg, &RISCV::VMRegClass);
20455 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
20456 return std::make_pair(VReg, &RISCV::VRRegClass);
20457 for (const auto *RC :
20458 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20459 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
20460 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
20461 return std::make_pair(VReg, RC);
20462 }
20463 }
20464 }
20465 }
20466
20467 std::pair<Register, const TargetRegisterClass *> Res =
20469
20470 // If we picked one of the Zfinx register classes, remap it to the GPR class.
20471 // FIXME: When Zfinx is supported in CodeGen this will need to take the
20472 // Subtarget into account.
20473 if (Res.second == &RISCV::GPRF16RegClass ||
20474 Res.second == &RISCV::GPRF32RegClass ||
20475 Res.second == &RISCV::GPRPairRegClass)
20476 return std::make_pair(Res.first, &RISCV::GPRRegClass);
20477
20478 return Res;
20479}
20480
20483 // Currently only support length 1 constraints.
20484 if (ConstraintCode.size() == 1) {
20485 switch (ConstraintCode[0]) {
20486 case 'A':
20488 default:
20489 break;
20490 }
20491 }
20492
20493 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
20494}
20495
20497 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
20498 SelectionDAG &DAG) const {
20499 // Currently only support length 1 constraints.
20500 if (Constraint.size() == 1) {
20501 switch (Constraint[0]) {
20502 case 'I':
20503 // Validate & create a 12-bit signed immediate operand.
20504 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20505 uint64_t CVal = C->getSExtValue();
20506 if (isInt<12>(CVal))
20507 Ops.push_back(DAG.getSignedConstant(
20508 CVal, SDLoc(Op), Subtarget.getXLenVT(), /*isTarget=*/true));
20509 }
20510 return;
20511 case 'J':
20512 // Validate & create an integer zero operand.
20513 if (isNullConstant(Op))
20514 Ops.push_back(
20515 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
20516 return;
20517 case 'K':
20518 // Validate & create a 5-bit unsigned immediate operand.
20519 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20520 uint64_t CVal = C->getZExtValue();
20521 if (isUInt<5>(CVal))
20522 Ops.push_back(
20523 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20524 }
20525 return;
20526 case 'S':
20528 return;
20529 default:
20530 break;
20531 }
20532 }
20533 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20534}
20535
20537 Instruction *Inst,
20538 AtomicOrdering Ord) const {
20539 if (Subtarget.hasStdExtZtso()) {
20540 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20541 return Builder.CreateFence(Ord);
20542 return nullptr;
20543 }
20544
20545 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20546 return Builder.CreateFence(Ord);
20547 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
20548 return Builder.CreateFence(AtomicOrdering::Release);
20549 return nullptr;
20550}
20551
20553 Instruction *Inst,
20554 AtomicOrdering Ord) const {
20555 if (Subtarget.hasStdExtZtso()) {
20556 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20557 return Builder.CreateFence(Ord);
20558 return nullptr;
20559 }
20560
20561 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
20562 return Builder.CreateFence(AtomicOrdering::Acquire);
20563 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
20566 return nullptr;
20567}
20568
20571 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
20572 // point operations can't be used in an lr/sc sequence without breaking the
20573 // forward-progress guarantee.
20574 if (AI->isFloatingPointOperation() ||
20578
20579 // Don't expand forced atomics, we want to have __sync libcalls instead.
20580 if (Subtarget.hasForcedAtomics())
20582
20583 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20584 if (AI->getOperation() == AtomicRMWInst::Nand) {
20585 if (Subtarget.hasStdExtZacas() &&
20586 (Size >= 32 || Subtarget.hasStdExtZabha()))
20588 if (Size < 32)
20590 }
20591
20592 if (Size < 32 && !Subtarget.hasStdExtZabha())
20594
20596}
20597
20598static Intrinsic::ID
20600 if (XLen == 32) {
20601 switch (BinOp) {
20602 default:
20603 llvm_unreachable("Unexpected AtomicRMW BinOp");
20605 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
20606 case AtomicRMWInst::Add:
20607 return Intrinsic::riscv_masked_atomicrmw_add_i32;
20608 case AtomicRMWInst::Sub:
20609 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
20611 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
20612 case AtomicRMWInst::Max:
20613 return Intrinsic::riscv_masked_atomicrmw_max_i32;
20614 case AtomicRMWInst::Min:
20615 return Intrinsic::riscv_masked_atomicrmw_min_i32;
20617 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
20619 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
20620 }
20621 }
20622
20623 if (XLen == 64) {
20624 switch (BinOp) {
20625 default:
20626 llvm_unreachable("Unexpected AtomicRMW BinOp");
20628 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
20629 case AtomicRMWInst::Add:
20630 return Intrinsic::riscv_masked_atomicrmw_add_i64;
20631 case AtomicRMWInst::Sub:
20632 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
20634 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
20635 case AtomicRMWInst::Max:
20636 return Intrinsic::riscv_masked_atomicrmw_max_i64;
20637 case AtomicRMWInst::Min:
20638 return Intrinsic::riscv_masked_atomicrmw_min_i64;
20640 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
20642 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
20643 }
20644 }
20645
20646 llvm_unreachable("Unexpected XLen\n");
20647}
20648
20650 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20651 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20652 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
20653 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
20654 // mask, as this produces better code than the LR/SC loop emitted by
20655 // int_riscv_masked_atomicrmw_xchg.
20656 if (AI->getOperation() == AtomicRMWInst::Xchg &&
20657 isa<ConstantInt>(AI->getValOperand())) {
20658 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
20659 if (CVal->isZero())
20660 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
20661 Builder.CreateNot(Mask, "Inv_Mask"),
20662 AI->getAlign(), Ord);
20663 if (CVal->isMinusOne())
20664 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
20665 AI->getAlign(), Ord);
20666 }
20667
20668 unsigned XLen = Subtarget.getXLen();
20669 Value *Ordering =
20670 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
20671 Type *Tys[] = {AlignedAddr->getType()};
20672 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
20673 AI->getModule(),
20675
20676 if (XLen == 64) {
20677 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
20678 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20679 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
20680 }
20681
20682 Value *Result;
20683
20684 // Must pass the shift amount needed to sign extend the loaded value prior
20685 // to performing a signed comparison for min/max. ShiftAmt is the number of
20686 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
20687 // is the number of bits to left+right shift the value in order to
20688 // sign-extend.
20689 if (AI->getOperation() == AtomicRMWInst::Min ||
20691 const DataLayout &DL = AI->getDataLayout();
20692 unsigned ValWidth =
20693 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
20694 Value *SextShamt =
20695 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
20696 Result = Builder.CreateCall(LrwOpScwLoop,
20697 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
20698 } else {
20699 Result =
20700 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
20701 }
20702
20703 if (XLen == 64)
20704 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20705 return Result;
20706}
20707
20710 AtomicCmpXchgInst *CI) const {
20711 // Don't expand forced atomics, we want to have __sync libcalls instead.
20712 if (Subtarget.hasForcedAtomics())
20714
20716 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
20717 (Size == 8 || Size == 16))
20720}
20721
20723 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20724 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20725 unsigned XLen = Subtarget.getXLen();
20726 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
20727 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
20728 if (XLen == 64) {
20729 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
20730 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
20731 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20732 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
20733 }
20734 Type *Tys[] = {AlignedAddr->getType()};
20735 Function *MaskedCmpXchg =
20736 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
20737 Value *Result = Builder.CreateCall(
20738 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
20739 if (XLen == 64)
20740 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20741 return Result;
20742}
20743
20745 EVT DataVT) const {
20746 // We have indexed loads for all supported EEW types. Indices are always
20747 // zero extended.
20748 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
20749 isTypeLegal(Extend.getValueType()) &&
20750 isTypeLegal(Extend.getOperand(0).getValueType()) &&
20751 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
20752}
20753
20755 EVT VT) const {
20756 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
20757 return false;
20758
20759 switch (FPVT.getSimpleVT().SimpleTy) {
20760 case MVT::f16:
20761 return Subtarget.hasStdExtZfhmin();
20762 case MVT::f32:
20763 return Subtarget.hasStdExtF();
20764 case MVT::f64:
20765 return Subtarget.hasStdExtD();
20766 default:
20767 return false;
20768 }
20769}
20770
20772 // If we are using the small code model, we can reduce size of jump table
20773 // entry to 4 bytes.
20774 if (Subtarget.is64Bit() && !isPositionIndependent() &&
20777 }
20779}
20780
20782 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
20783 unsigned uid, MCContext &Ctx) const {
20784 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
20786 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
20787}
20788
20790 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
20791 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
20792 // a power of two as well.
20793 // FIXME: This doesn't work for zve32, but that's already broken
20794 // elsewhere for the same reason.
20795 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
20796 static_assert(RISCV::RVVBitsPerBlock == 64,
20797 "RVVBitsPerBlock changed, audit needed");
20798 return true;
20799}
20800
20802 SDValue &Offset,
20804 SelectionDAG &DAG) const {
20805 // Target does not support indexed loads.
20806 if (!Subtarget.hasVendorXTHeadMemIdx())
20807 return false;
20808
20809 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
20810 return false;
20811
20812 Base = Op->getOperand(0);
20813 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
20814 int64_t RHSC = RHS->getSExtValue();
20815 if (Op->getOpcode() == ISD::SUB)
20816 RHSC = -(uint64_t)RHSC;
20817
20818 // The constants that can be encoded in the THeadMemIdx instructions
20819 // are of the form (sign_extend(imm5) << imm2).
20820 bool isLegalIndexedOffset = false;
20821 for (unsigned i = 0; i < 4; i++)
20822 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
20823 isLegalIndexedOffset = true;
20824 break;
20825 }
20826
20827 if (!isLegalIndexedOffset)
20828 return false;
20829
20830 Offset = Op->getOperand(1);
20831 return true;
20832 }
20833
20834 return false;
20835}
20836
20838 SDValue &Offset,
20840 SelectionDAG &DAG) const {
20841 EVT VT;
20842 SDValue Ptr;
20843 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20844 VT = LD->getMemoryVT();
20845 Ptr = LD->getBasePtr();
20846 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20847 VT = ST->getMemoryVT();
20848 Ptr = ST->getBasePtr();
20849 } else
20850 return false;
20851
20852 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
20853 return false;
20854
20855 AM = ISD::PRE_INC;
20856 return true;
20857}
20858
20860 SDValue &Base,
20861 SDValue &Offset,
20863 SelectionDAG &DAG) const {
20864 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
20865 if (Op->getOpcode() != ISD::ADD)
20866 return false;
20867
20868 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N))
20869 Base = LS->getBasePtr();
20870 else
20871 return false;
20872
20873 if (Base == Op->getOperand(0))
20874 Offset = Op->getOperand(1);
20875 else if (Base == Op->getOperand(1))
20876 Offset = Op->getOperand(0);
20877 else
20878 return false;
20879
20880 AM = ISD::POST_INC;
20881 return true;
20882 }
20883
20884 EVT VT;
20885 SDValue Ptr;
20886 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20887 VT = LD->getMemoryVT();
20888 Ptr = LD->getBasePtr();
20889 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20890 VT = ST->getMemoryVT();
20891 Ptr = ST->getBasePtr();
20892 } else
20893 return false;
20894
20895 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
20896 return false;
20897 // Post-indexing updates the base, so it's not a valid transform
20898 // if that's not the same as the load's pointer.
20899 if (Ptr != Base)
20900 return false;
20901
20902 AM = ISD::POST_INC;
20903 return true;
20904}
20905
20907 EVT VT) const {
20908 EVT SVT = VT.getScalarType();
20909
20910 if (!SVT.isSimple())
20911 return false;
20912
20913 switch (SVT.getSimpleVT().SimpleTy) {
20914 case MVT::f16:
20915 return VT.isVector() ? Subtarget.hasVInstructionsF16()
20916 : Subtarget.hasStdExtZfhOrZhinx();
20917 case MVT::f32:
20918 return Subtarget.hasStdExtFOrZfinx();
20919 case MVT::f64:
20920 return Subtarget.hasStdExtDOrZdinx();
20921 default:
20922 break;
20923 }
20924
20925 return false;
20926}
20927
20929 // Zacas will use amocas.w which does not require extension.
20930 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
20931}
20932
20934 const Constant *PersonalityFn) const {
20935 return RISCV::X10;
20936}
20937
20939 const Constant *PersonalityFn) const {
20940 return RISCV::X11;
20941}
20942
20944 // Return false to suppress the unnecessary extensions if the LibCall
20945 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
20946 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
20947 Type.getSizeInBits() < Subtarget.getXLen()))
20948 return false;
20949
20950 return true;
20951}
20952
20954 if (Subtarget.is64Bit() && Type == MVT::i32)
20955 return true;
20956
20957 return IsSigned;
20958}
20959
20961 SDValue C) const {
20962 // Check integral scalar types.
20963 if (!VT.isScalarInteger())
20964 return false;
20965
20966 // Omit the optimization if the sub target has the M extension and the data
20967 // size exceeds XLen.
20968 const bool HasZmmul = Subtarget.hasStdExtZmmul();
20969 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
20970 return false;
20971
20972 auto *ConstNode = cast<ConstantSDNode>(C);
20973 const APInt &Imm = ConstNode->getAPIntValue();
20974
20975 // Break the MUL to a SLLI and an ADD/SUB.
20976 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
20977 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
20978 return true;
20979
20980 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
20981 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
20982 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
20983 (Imm - 8).isPowerOf2()))
20984 return true;
20985
20986 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
20987 // a pair of LUI/ADDI.
20988 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
20989 ConstNode->hasOneUse()) {
20990 APInt ImmS = Imm.ashr(Imm.countr_zero());
20991 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
20992 (1 - ImmS).isPowerOf2())
20993 return true;
20994 }
20995
20996 return false;
20997}
20998
21000 SDValue ConstNode) const {
21001 // Let the DAGCombiner decide for vectors.
21002 EVT VT = AddNode.getValueType();
21003 if (VT.isVector())
21004 return true;
21005
21006 // Let the DAGCombiner decide for larger types.
21007 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
21008 return true;
21009
21010 // It is worse if c1 is simm12 while c1*c2 is not.
21011 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
21012 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
21013 const APInt &C1 = C1Node->getAPIntValue();
21014 const APInt &C2 = C2Node->getAPIntValue();
21015 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
21016 return false;
21017
21018 // Default to true and let the DAGCombiner decide.
21019 return true;
21020}
21021
21023 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
21024 unsigned *Fast) const {
21025 if (!VT.isVector()) {
21026 if (Fast)
21027 *Fast = Subtarget.enableUnalignedScalarMem();
21028 return Subtarget.enableUnalignedScalarMem();
21029 }
21030
21031 // All vector implementations must support element alignment
21032 EVT ElemVT = VT.getVectorElementType();
21033 if (Alignment >= ElemVT.getStoreSize()) {
21034 if (Fast)
21035 *Fast = 1;
21036 return true;
21037 }
21038
21039 // Note: We lower an unmasked unaligned vector access to an equally sized
21040 // e8 element type access. Given this, we effectively support all unmasked
21041 // misaligned accesses. TODO: Work through the codegen implications of
21042 // allowing such accesses to be formed, and considered fast.
21043 if (Fast)
21044 *Fast = Subtarget.enableUnalignedVectorMem();
21045 return Subtarget.enableUnalignedVectorMem();
21046}
21047
21048
21050 const AttributeList &FuncAttributes) const {
21051 if (!Subtarget.hasVInstructions())
21052 return MVT::Other;
21053
21054 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
21055 return MVT::Other;
21056
21057 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
21058 // has an expansion threshold, and we want the number of hardware memory
21059 // operations to correspond roughly to that threshold. LMUL>1 operations
21060 // are typically expanded linearly internally, and thus correspond to more
21061 // than one actual memory operation. Note that store merging and load
21062 // combining will typically form larger LMUL operations from the LMUL1
21063 // operations emitted here, and that's okay because combining isn't
21064 // introducing new memory operations; it's just merging existing ones.
21065 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
21066 if (Op.size() < MinVLenInBytes)
21067 // TODO: Figure out short memops. For the moment, do the default thing
21068 // which ends up using scalar sequences.
21069 return MVT::Other;
21070
21071 // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
21072 // fixed vectors.
21073 if (MinVLenInBytes <= RISCV::RVVBitsPerBlock / 8)
21074 return MVT::Other;
21075
21076 // Prefer i8 for non-zero memset as it allows us to avoid materializing
21077 // a large scalar constant and instead use vmv.v.x/i to do the
21078 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
21079 // maximize the chance we can encode the size in the vsetvli.
21080 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
21081 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
21082
21083 // Do we have sufficient alignment for our preferred VT? If not, revert
21084 // to largest size allowed by our alignment criteria.
21085 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
21086 Align RequiredAlign(PreferredVT.getStoreSize());
21087 if (Op.isFixedDstAlign())
21088 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
21089 if (Op.isMemcpy())
21090 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
21091 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
21092 }
21093 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
21094}
21095
21097 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
21098 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
21099 bool IsABIRegCopy = CC.has_value();
21100 EVT ValueVT = Val.getValueType();
21101 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
21102 PartVT == MVT::f32) {
21103 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
21104 // nan, and cast to f32.
21105 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
21106 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
21107 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
21108 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
21109 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
21110 Parts[0] = Val;
21111 return true;
21112 }
21113
21114 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
21115 LLVMContext &Context = *DAG.getContext();
21116 EVT ValueEltVT = ValueVT.getVectorElementType();
21117 EVT PartEltVT = PartVT.getVectorElementType();
21118 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21119 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21120 if (PartVTBitSize % ValueVTBitSize == 0) {
21121 assert(PartVTBitSize >= ValueVTBitSize);
21122 // If the element types are different, bitcast to the same element type of
21123 // PartVT first.
21124 // Give an example here, we want copy a <vscale x 1 x i8> value to
21125 // <vscale x 4 x i16>.
21126 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
21127 // subvector, then we can bitcast to <vscale x 4 x i16>.
21128 if (ValueEltVT != PartEltVT) {
21129 if (PartVTBitSize > ValueVTBitSize) {
21130 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21131 assert(Count != 0 && "The number of element should not be zero.");
21132 EVT SameEltTypeVT =
21133 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21134 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
21135 DAG.getUNDEF(SameEltTypeVT), Val,
21136 DAG.getVectorIdxConstant(0, DL));
21137 }
21138 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
21139 } else {
21140 Val =
21141 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
21142 Val, DAG.getVectorIdxConstant(0, DL));
21143 }
21144 Parts[0] = Val;
21145 return true;
21146 }
21147 }
21148 return false;
21149}
21150
21152 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
21153 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
21154 bool IsABIRegCopy = CC.has_value();
21155 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
21156 PartVT == MVT::f32) {
21157 SDValue Val = Parts[0];
21158
21159 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
21160 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
21161 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
21162 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
21163 return Val;
21164 }
21165
21166 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
21167 LLVMContext &Context = *DAG.getContext();
21168 SDValue Val = Parts[0];
21169 EVT ValueEltVT = ValueVT.getVectorElementType();
21170 EVT PartEltVT = PartVT.getVectorElementType();
21171 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21172 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21173 if (PartVTBitSize % ValueVTBitSize == 0) {
21174 assert(PartVTBitSize >= ValueVTBitSize);
21175 EVT SameEltTypeVT = ValueVT;
21176 // If the element types are different, convert it to the same element type
21177 // of PartVT.
21178 // Give an example here, we want copy a <vscale x 1 x i8> value from
21179 // <vscale x 4 x i16>.
21180 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
21181 // then we can extract <vscale x 1 x i8>.
21182 if (ValueEltVT != PartEltVT) {
21183 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21184 assert(Count != 0 && "The number of element should not be zero.");
21185 SameEltTypeVT =
21186 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21187 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
21188 }
21189 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
21190 DAG.getVectorIdxConstant(0, DL));
21191 return Val;
21192 }
21193 }
21194 return SDValue();
21195}
21196
21198 // When aggressively optimizing for code size, we prefer to use a div
21199 // instruction, as it is usually smaller than the alternative sequence.
21200 // TODO: Add vector division?
21201 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
21202 return OptSize && !VT.isVector();
21203}
21204
21206 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
21207 // some situation.
21208 unsigned Opc = N->getOpcode();
21209 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
21210 return false;
21211 return true;
21212}
21213
21214static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
21215 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
21216 Function *ThreadPointerFunc =
21217 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
21218 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
21219 IRB.CreateCall(ThreadPointerFunc), Offset);
21220}
21221
21223 // Fuchsia provides a fixed TLS slot for the stack cookie.
21224 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
21225 if (Subtarget.isTargetFuchsia())
21226 return useTpOffset(IRB, -0x10);
21227
21228 // Android provides a fixed TLS slot for the stack cookie. See the definition
21229 // of TLS_SLOT_STACK_GUARD in
21230 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
21231 if (Subtarget.isTargetAndroid())
21232 return useTpOffset(IRB, -0x18);
21233
21235}
21236
21238 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
21239 const DataLayout &DL) const {
21240 EVT VT = getValueType(DL, VTy);
21241 // Don't lower vlseg/vsseg for vector types that can't be split.
21242 if (!isTypeLegal(VT))
21243 return false;
21244
21246 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
21247 Alignment))
21248 return false;
21249
21250 MVT ContainerVT = VT.getSimpleVT();
21251
21252 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21253 if (!Subtarget.useRVVForFixedLengthVectors())
21254 return false;
21255 // Sometimes the interleaved access pass picks up splats as interleaves of
21256 // one element. Don't lower these.
21257 if (FVTy->getNumElements() < 2)
21258 return false;
21259
21261 } else {
21262 // The intrinsics for scalable vectors are not overloaded on pointer type
21263 // and can only handle the default address space.
21264 if (AddrSpace)
21265 return false;
21266 }
21267
21268 // Need to make sure that EMUL * NFIELDS ≤ 8
21269 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
21270 if (Fractional)
21271 return true;
21272 return Factor * LMUL <= 8;
21273}
21274
21276 Align Alignment) const {
21277 if (!Subtarget.hasVInstructions())
21278 return false;
21279
21280 // Only support fixed vectors if we know the minimum vector size.
21281 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
21282 return false;
21283
21284 EVT ScalarType = DataType.getScalarType();
21285 if (!isLegalElementTypeForRVV(ScalarType))
21286 return false;
21287
21288 if (!Subtarget.enableUnalignedVectorMem() &&
21289 Alignment < ScalarType.getStoreSize())
21290 return false;
21291
21292 return true;
21293}
21294
21296 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
21297 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
21298 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
21299 Intrinsic::riscv_seg8_load};
21300
21301/// Lower an interleaved load into a vlsegN intrinsic.
21302///
21303/// E.g. Lower an interleaved load (Factor = 2):
21304/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
21305/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21306/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21307///
21308/// Into:
21309/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
21310/// %ptr, i64 4)
21311/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
21312/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
21315 ArrayRef<unsigned> Indices, unsigned Factor) const {
21316 IRBuilder<> Builder(LI);
21317
21318 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
21319 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
21321 LI->getDataLayout()))
21322 return false;
21323
21324 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21325
21326 Function *VlsegNFunc =
21328 {VTy, LI->getPointerOperandType(), XLenTy});
21329
21330 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
21331
21332 CallInst *VlsegN =
21333 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
21334
21335 for (unsigned i = 0; i < Shuffles.size(); i++) {
21336 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
21337 Shuffles[i]->replaceAllUsesWith(SubVec);
21338 }
21339
21340 return true;
21341}
21342
21344 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
21345 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
21346 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
21347 Intrinsic::riscv_seg8_store};
21348
21349/// Lower an interleaved store into a vssegN intrinsic.
21350///
21351/// E.g. Lower an interleaved store (Factor = 3):
21352/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21353/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21354/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21355///
21356/// Into:
21357/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21358/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21359/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21360/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
21361/// %ptr, i32 4)
21362///
21363/// Note that the new shufflevectors will be removed and we'll only generate one
21364/// vsseg3 instruction in CodeGen.
21366 ShuffleVectorInst *SVI,
21367 unsigned Factor) const {
21368 IRBuilder<> Builder(SI);
21369 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
21370 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
21371 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
21372 ShuffleVTy->getNumElements() / Factor);
21373 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
21374 SI->getPointerAddressSpace(),
21375 SI->getDataLayout()))
21376 return false;
21377
21378 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21379
21380 Function *VssegNFunc =
21381 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
21382 {VTy, SI->getPointerOperandType(), XLenTy});
21383
21384 auto Mask = SVI->getShuffleMask();
21386
21387 for (unsigned i = 0; i < Factor; i++) {
21388 Value *Shuffle = Builder.CreateShuffleVector(
21389 SVI->getOperand(0), SVI->getOperand(1),
21390 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
21391 Ops.push_back(Shuffle);
21392 }
21393 // This VL should be OK (should be executable in one vsseg instruction,
21394 // potentially under larger LMULs) because we checked that the fixed vector
21395 // type fits in isLegalInterleavedAccessType
21396 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
21397 Ops.append({SI->getPointerOperand(), VL});
21398
21399 Builder.CreateCall(VssegNFunc, Ops);
21400
21401 return true;
21402}
21403
21405 IntrinsicInst *DI, LoadInst *LI,
21406 SmallVectorImpl<Instruction *> &DeadInsts) const {
21407 assert(LI->isSimple());
21408 IRBuilder<> Builder(LI);
21409
21410 // Only deinterleave2 supported at present.
21411 if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
21412 return false;
21413
21414 unsigned Factor = 2;
21415
21416 VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
21417 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
21418
21419 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
21421 LI->getDataLayout()))
21422 return false;
21423
21424 Function *VlsegNFunc;
21425 Value *VL;
21426 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21428
21429 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21430 VlsegNFunc = Intrinsic::getDeclaration(
21431 LI->getModule(), FixedVlsegIntrIds[Factor - 2],
21432 {ResVTy, LI->getPointerOperandType(), XLenTy});
21433 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21434 } else {
21435 static const Intrinsic::ID IntrIds[] = {
21436 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
21437 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
21438 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
21439 Intrinsic::riscv_vlseg8};
21440
21441 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],
21442 {ResVTy, XLenTy});
21443 VL = Constant::getAllOnesValue(XLenTy);
21444 Ops.append(Factor, PoisonValue::get(ResVTy));
21445 }
21446
21447 Ops.append({LI->getPointerOperand(), VL});
21448
21449 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
21450 DI->replaceAllUsesWith(Vlseg);
21451
21452 return true;
21453}
21454
21457 SmallVectorImpl<Instruction *> &DeadInsts) const {
21458 assert(SI->isSimple());
21459 IRBuilder<> Builder(SI);
21460
21461 // Only interleave2 supported at present.
21462 if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
21463 return false;
21464
21465 unsigned Factor = 2;
21466
21467 VectorType *VTy = cast<VectorType>(II->getType());
21468 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
21469
21470 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
21471 SI->getPointerAddressSpace(),
21472 SI->getDataLayout()))
21473 return false;
21474
21475 Function *VssegNFunc;
21476 Value *VL;
21477 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21478
21479 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21480 VssegNFunc = Intrinsic::getDeclaration(
21481 SI->getModule(), FixedVssegIntrIds[Factor - 2],
21482 {InVTy, SI->getPointerOperandType(), XLenTy});
21483 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21484 } else {
21485 static const Intrinsic::ID IntrIds[] = {
21486 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
21487 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
21488 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
21489 Intrinsic::riscv_vsseg8};
21490
21491 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
21492 {InVTy, XLenTy});
21493 VL = Constant::getAllOnesValue(XLenTy);
21494 }
21495
21496 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
21497 SI->getPointerOperand(), VL});
21498
21499 return true;
21500}
21501
21505 const TargetInstrInfo *TII) const {
21506 assert(MBBI->isCall() && MBBI->getCFIType() &&
21507 "Invalid call instruction for a KCFI check");
21508 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
21509 MBBI->getOpcode()));
21510
21511 MachineOperand &Target = MBBI->getOperand(0);
21512 Target.setIsRenamable(false);
21513
21514 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
21515 .addReg(Target.getReg())
21516 .addImm(MBBI->getCFIType())
21517 .getInstr();
21518}
21519
21520#define GET_REGISTER_MATCHER
21521#include "RISCVGenAsmMatcher.inc"
21522
21525 const MachineFunction &MF) const {
21527 if (Reg == RISCV::NoRegister)
21529 if (Reg == RISCV::NoRegister)
21531 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
21532 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
21533 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
21534 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
21535 StringRef(RegName) + "\"."));
21536 return Reg;
21537}
21538
21541 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
21542
21543 if (NontemporalInfo == nullptr)
21545
21546 // 1 for default value work as __RISCV_NTLH_ALL
21547 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
21548 // 3 -> __RISCV_NTLH_ALL_PRIVATE
21549 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
21550 // 5 -> __RISCV_NTLH_ALL
21551 int NontemporalLevel = 5;
21552 const MDNode *RISCVNontemporalInfo =
21553 I.getMetadata("riscv-nontemporal-domain");
21554 if (RISCVNontemporalInfo != nullptr)
21555 NontemporalLevel =
21556 cast<ConstantInt>(
21557 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
21558 ->getValue())
21559 ->getZExtValue();
21560
21561 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
21562 "RISC-V target doesn't support this non-temporal domain.");
21563
21564 NontemporalLevel -= 2;
21566 if (NontemporalLevel & 0b1)
21567 Flags |= MONontemporalBit0;
21568 if (NontemporalLevel & 0b10)
21569 Flags |= MONontemporalBit1;
21570
21571 return Flags;
21572}
21573
21576
21577 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
21579 TargetFlags |= (NodeFlags & MONontemporalBit0);
21580 TargetFlags |= (NodeFlags & MONontemporalBit1);
21581 return TargetFlags;
21582}
21583
21585 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
21586 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
21587}
21588
21590 if (VT.isScalableVector())
21591 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
21592 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
21593 return true;
21594 return Subtarget.hasStdExtZbb() &&
21595 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
21596}
21597
21599 ISD::CondCode Cond) const {
21600 return isCtpopFast(VT) ? 0 : 1;
21601}
21602
21604
21605 // GISel support is in progress or complete for these opcodes.
21606 unsigned Op = Inst.getOpcode();
21607 if (Op == Instruction::Add || Op == Instruction::Sub ||
21608 Op == Instruction::And || Op == Instruction::Or ||
21609 Op == Instruction::Xor || Op == Instruction::InsertElement ||
21610 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
21611 Op == Instruction::Freeze || Op == Instruction::Store)
21612 return false;
21613
21614 if (Inst.getType()->isScalableTy())
21615 return true;
21616
21617 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
21618 if (Inst.getOperand(i)->getType()->isScalableTy() &&
21619 !isa<ReturnInst>(&Inst))
21620 return true;
21621
21622 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
21623 if (AI->getAllocatedType()->isScalableTy())
21624 return true;
21625 }
21626
21627 return false;
21628}
21629
21630SDValue
21631RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
21632 SelectionDAG &DAG,
21633 SmallVectorImpl<SDNode *> &Created) const {
21635 if (isIntDivCheap(N->getValueType(0), Attr))
21636 return SDValue(N, 0); // Lower SDIV as SDIV
21637
21638 // Only perform this transform if short forward branch opt is supported.
21639 if (!Subtarget.hasShortForwardBranchOpt())
21640 return SDValue();
21641 EVT VT = N->getValueType(0);
21642 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
21643 return SDValue();
21644
21645 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
21646 if (Divisor.sgt(2048) || Divisor.slt(-2048))
21647 return SDValue();
21648 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
21649}
21650
21651bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
21652 EVT VT, const APInt &AndMask) const {
21653 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
21654 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
21656}
21657
21658unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
21659 return Subtarget.getMinimumJumpTableEntries();
21660}
21661
21662// Handle single arg such as return value.
21663template <typename Arg>
21664void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) {
21665 // This lambda determines whether an array of types are constructed by
21666 // homogeneous vector types.
21667 auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) {
21668 // First, extract the first element in the argument type.
21669 auto It = ArgList.begin();
21670 MVT FirstArgRegType = It->VT;
21671
21672 // Return if there is no return or the type needs split.
21673 if (It == ArgList.end() || It->Flags.isSplit())
21674 return false;
21675
21676 ++It;
21677
21678 // Return if this argument type contains only 1 element, or it's not a
21679 // vector type.
21680 if (It == ArgList.end() || !FirstArgRegType.isScalableVector())
21681 return false;
21682
21683 // Second, check if the following elements in this argument type are all the
21684 // same.
21685 for (; It != ArgList.end(); ++It)
21686 if (It->Flags.isSplit() || It->VT != FirstArgRegType)
21687 return false;
21688
21689 return true;
21690 };
21691
21692 if (isHomogeneousScalableVectorType(ArgList)) {
21693 // Handle as tuple type
21694 RVVArgInfos.push_back({(unsigned)ArgList.size(), ArgList[0].VT, false});
21695 } else {
21696 // Handle as normal vector type
21697 bool FirstVMaskAssigned = false;
21698 for (const auto &OutArg : ArgList) {
21699 MVT RegisterVT = OutArg.VT;
21700
21701 // Skip non-RVV register type
21702 if (!RegisterVT.isVector())
21703 continue;
21704
21705 if (RegisterVT.isFixedLengthVector())
21706 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21707
21708 if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) {
21709 RVVArgInfos.push_back({1, RegisterVT, true});
21710 FirstVMaskAssigned = true;
21711 continue;
21712 }
21713
21714 RVVArgInfos.push_back({1, RegisterVT, false});
21715 }
21716 }
21717}
21718
21719// Handle multiple args.
21720template <>
21721void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) {
21722 const DataLayout &DL = MF->getDataLayout();
21723 const Function &F = MF->getFunction();
21724 LLVMContext &Context = F.getContext();
21725
21726 bool FirstVMaskAssigned = false;
21727 for (Type *Ty : TypeList) {
21728 StructType *STy = dyn_cast<StructType>(Ty);
21729 if (STy && STy->containsHomogeneousScalableVectorTypes()) {
21730 Type *ElemTy = STy->getTypeAtIndex(0U);
21731 EVT VT = TLI->getValueType(DL, ElemTy);
21732 MVT RegisterVT =
21733 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21734 unsigned NumRegs =
21735 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21736
21737 RVVArgInfos.push_back(
21738 {NumRegs * STy->getNumElements(), RegisterVT, false});
21739 } else {
21740 SmallVector<EVT, 4> ValueVTs;
21741 ComputeValueVTs(*TLI, DL, Ty, ValueVTs);
21742
21743 for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
21744 ++Value) {
21745 EVT VT = ValueVTs[Value];
21746 MVT RegisterVT =
21747 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21748 unsigned NumRegs =
21749 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21750
21751 // Skip non-RVV register type
21752 if (!RegisterVT.isVector())
21753 continue;
21754
21755 if (RegisterVT.isFixedLengthVector())
21756 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21757
21758 if (!FirstVMaskAssigned &&
21759 RegisterVT.getVectorElementType() == MVT::i1) {
21760 RVVArgInfos.push_back({1, RegisterVT, true});
21761 FirstVMaskAssigned = true;
21762 --NumRegs;
21763 }
21764
21765 RVVArgInfos.insert(RVVArgInfos.end(), NumRegs, {1, RegisterVT, false});
21766 }
21767 }
21768 }
21769}
21770
21771void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul,
21772 unsigned StartReg) {
21773 assert((StartReg % LMul) == 0 &&
21774 "Start register number should be multiple of lmul");
21775 const MCPhysReg *VRArrays;
21776 switch (LMul) {
21777 default:
21778 report_fatal_error("Invalid lmul");
21779 case 1:
21780 VRArrays = ArgVRs;
21781 break;
21782 case 2:
21783 VRArrays = ArgVRM2s;
21784 break;
21785 case 4:
21786 VRArrays = ArgVRM4s;
21787 break;
21788 case 8:
21789 VRArrays = ArgVRM8s;
21790 break;
21791 }
21792
21793 for (unsigned i = 0; i < NF; ++i)
21794 if (StartReg)
21795 AllocatedPhysRegs.push_back(VRArrays[(StartReg - 8) / LMul + i]);
21796 else
21797 AllocatedPhysRegs.push_back(MCPhysReg());
21798}
21799
21800/// This function determines if each RVV argument is passed by register, if the
21801/// argument can be assigned to a VR, then give it a specific register.
21802/// Otherwise, assign the argument to 0 which is a invalid MCPhysReg.
21803void RVVArgDispatcher::compute() {
21804 uint32_t AssignedMap = 0;
21805 auto allocate = [&](const RVVArgInfo &ArgInfo) {
21806 // Allocate first vector mask argument to V0.
21807 if (ArgInfo.FirstVMask) {
21808 AllocatedPhysRegs.push_back(RISCV::V0);
21809 return;
21810 }
21811
21812 unsigned RegsNeeded = divideCeil(
21813 ArgInfo.VT.getSizeInBits().getKnownMinValue(), RISCV::RVVBitsPerBlock);
21814 unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded;
21815 for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs;
21816 StartReg += RegsNeeded) {
21817 uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg;
21818 if ((AssignedMap & Map) == 0) {
21819 allocatePhysReg(ArgInfo.NF, RegsNeeded, StartReg + 8);
21820 AssignedMap |= Map;
21821 return;
21822 }
21823 }
21824
21825 allocatePhysReg(ArgInfo.NF, RegsNeeded, 0);
21826 };
21827
21828 for (unsigned i = 0; i < RVVArgInfos.size(); ++i)
21829 allocate(RVVArgInfos[i]);
21830}
21831
21833 assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range");
21834 return AllocatedPhysRegs[CurIdx++];
21835}
21836
21839 int JTI,
21840 SelectionDAG &DAG) const {
21841 if (Subtarget.hasStdExtZicfilp()) {
21842 // When Zicfilp enabled, we need to use software guarded branch for jump
21843 // table branch.
21844 SDValue JTInfo = DAG.getJumpTableDebugInfo(JTI, Value, dl);
21845 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, JTInfo,
21846 Addr);
21847 }
21848 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
21849}
21850
21852
21853#define GET_RISCVVIntrinsicsTable_IMPL
21854#include "RISCVGenSearchableTables.inc"
21855
21856} // namespace llvm::RISCVVIntrinsicsTable
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
amdgpu AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define NL
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define RegName(no)
const MCPhysReg ArgFPR32s[]
const MCPhysReg ArgVRs[]
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
const MCPhysReg ArgFPR64s[]
const MCPhysReg ArgGPRs[]
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2, bool EABI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static bool hasPassthruOp(unsigned Opcode)
Return true if a RISC-V target specified op has a passthru operand.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static const MCPhysReg ArgVRM2s[]
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< unsigned > preAssignMask(const ArgTy &Args)
static SDValue getVLOperand(SDValue Op)
static SDValue lowerBUILD_VECTORvXf16(SDValue Op, SelectionDAG &DAG)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, bool EvenElts, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static ArrayRef< MCPhysReg > getFastCCArgGPRs(const RISCVABI::ABI ABI)
static const MCPhysReg ArgVRM8s[]
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static const MCPhysReg ArgVRM4s[]
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static const MCPhysReg ArgFPR16s[]
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static const Intrinsic::ID FixedVssegIntrIds[]
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
const SmallVectorImpl< MachineOperand > & Cond
const MachineOperand & RHS
#define ROTR(x, n)
Definition: SHA256.cpp:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isCommutative(Instruction *I)
#define ROTL(x, b)
Definition: SipHash.cpp:32
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
static constexpr int Concat[]
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1245
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1237
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:1017
Class for arbitrary precision integers.
Definition: APInt.h:77
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:206
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1497
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1363
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1469
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1307
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1178
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:348
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1159
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:357
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:186
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:306
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition: APInt.cpp:1614
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1374
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:412
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:196
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1488
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition: APInt.cpp:1706
bool isMask(unsigned numBits) const
Definition: APInt.h:465
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:311
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1234
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:417
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:283
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1107
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:273
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1366
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:453
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:263
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:216
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1519
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1198
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
an instruction to allocate memory on the stack
Definition: Instructions.h:61
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:495
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:696
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:809
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:708
@ Add
*p = old + v
Definition: Instructions.h:712
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:726
@ Or
*p = old | v
Definition: Instructions.h:720
@ Sub
*p = old - v
Definition: Instructions.h:714
@ And
*p = old & v
Definition: Instructions.h:716
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:748
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:724
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:730
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:728
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:752
@ Nand
*p = ~(old & v)
Definition: Instructions.h:718
bool isFloatingPointOperation() const
Definition: Instructions.h:864
BinOp getOperation() const
Definition: Instructions.h:787
Value * getValOperand()
Definition: Instructions.h:856
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:829
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:218
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:367
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:842
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:314
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:680
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:214
iterator_range< arg_iterator > args()
Definition: Function.h:890
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:769
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:702
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:281
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:357
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:380
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:219
Argument * getArg(unsigned i) const
Definition: Function.h:884
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1896
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2536
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1851
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2053
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:523
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:528
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1766
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1361
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:494
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2514
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1871
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2027
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2432
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:513
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2686
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
Class to represent integer types.
Definition: DerivedTypes.h:40
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:55
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:174
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:259
Value * getPointerOperand()
Definition: Instructions.h:253
bool isSimple() const
Definition: Instructions.h:245
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:209
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:393
Metadata node.
Definition: Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1430
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition: ValueTypes.cpp:293
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:403
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtDOrZdinx() const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
bool hasVInstructionsBF16Minimal() const
unsigned getDLenFactor() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool isRegisterReservedByUser(Register i) const
bool hasVInstructionsF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool isTargetAndroid() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
std::pair< int, bool > getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI, SmallVectorImpl< Instruction * > &DeadInsts) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, LoadInst *LI, SmallVectorImpl< Instruction * > &DeadInsts) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
static RISCVII::VLMUL getLMUL(MVT VT)
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
As per the spec, the rules for passing vector arguments are as follows:
static constexpr unsigned NumArgVRs
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:226
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:736
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:493
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:390
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:746
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:842
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:487
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:876
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:488
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:787
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:690
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:782
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:482
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:813
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:859
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:500
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:753
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:570
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:892
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
An instruction for storing to memory.
Definition: Instructions.h:290
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:149
std::string lower() const
Definition: StringRef.cpp:111
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
Class to represent struct types.
Definition: DerivedTypes.h:216
bool containsHomogeneousScalableVectorTypes() const
Returns true if this struct contains homogeneous scalable vector types.
Definition: Type.cpp:423
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:341
Type * getTypeAtIndex(const Value *V) const
Given an index value into the type, return the type of the element.
Definition: Type.cpp:600
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:245
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:372
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:343
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:258
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
self_iterator getIterator()
Definition: ilist_node.h:132
#define INT64_MIN
Definition: DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Entry
Definition: COFF.h:826
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition: CallingConv.h:268
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1194
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1190
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:752
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:490
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1407
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1355
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1440
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1337
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:573
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:743
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1223
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1339
@ STRICT_FCEIL
Definition: ISDOpcodes.h:440
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1340
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1099
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:497
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:840
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:557
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1425
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1429
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:716
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1296
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1301
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1439
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:491
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:963
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1335
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:953
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1336
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1480
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:935
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:684
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:464
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:634
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1256
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1422
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:751
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1289
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1426
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1056
@ STRICT_LROUND
Definition: ISDOpcodes.h:445
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:980
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1145
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1338
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1124
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition: ISDOpcodes.h:600
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:660
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:521
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:756
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1441
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:641
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1219
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:444
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1434
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:930
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:673
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:614
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1333
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1279
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:906
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:771
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1397
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1316
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1341
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1028
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1109
@ STRICT_LRINT
Definition: ISDOpcodes.h:447
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:848
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:696
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:605
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:938
@ STRICT_FROUND
Definition: ISDOpcodes.h:442
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:765
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:463
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
Definition: ISDOpcodes.h:1367
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1442
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:441
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:443
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:972
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1331
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:457
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:479
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:456
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1047
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1332
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:886
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1250
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:484
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1276
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:679
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:650
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ STRICT_LLRINT
Definition: ISDOpcodes.h:448
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:626
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1330
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:919
@ STRICT_LLROUND
Definition: ISDOpcodes.h:446
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:437
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:905
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1430
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1214
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1138
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:793
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:507
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ STRICT_FRINT
Definition: ISDOpcodes.h:436
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition: ISDOpcodes.h:594
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition: ISDOpcodes.h:1052
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition: ISDOpcodes.h:831
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:691
@ TRUNCATE_USAT_U
Definition: ISDOpcodes.h:835
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1565
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1565
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1552
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
static const int FIRST_TARGET_STRICTFP_OPCODE
FIRST_TARGET_STRICTFP_OPCODE - Target-specific pre-isel operations which cannot raise FP exceptions s...
Definition: ISDOpcodes.h:1486
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1603
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1583
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1648
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1539
@ Bitcast
Perform the operation on a different, but equivalently sized type.
ABI getTargetABI(StringRef ABIName)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:599
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
@ SW_GUARDED_BRIND
Software guarded BRIND node.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition: DWP.cpp:480
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2431
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1535
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:394
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1935
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:403
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:79
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1928
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1886
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:253
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:329
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Helper struct shared between Function Specialization and SCCP Solver.
Definition: SCCPSolver.h:41
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:381
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:388
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:275
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:291
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:341
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:371
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:416
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:307
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:367
bool isFixedLengthVector() const
Definition: ValueTypes.h:178
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:405
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:314
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:204
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:319
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:327
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:299
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1042
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:62
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:263
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:150
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:161
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:70
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:285
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:300
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:169
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:1002
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:269
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:285
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasDisjoint() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)