LLVM 20.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
18#include "RISCVRegisterInfo.h"
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
41#include "llvm/Support/Debug.h"
47#include <optional>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "riscv-lower"
52
53STATISTIC(NumTailCalls, "Number of tail calls");
54
56 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
57 cl::desc("Give the maximum size (in number of nodes) of the web of "
58 "instructions that we will consider for VW expansion"),
59 cl::init(18));
60
61static cl::opt<bool>
62 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
63 cl::desc("Allow the formation of VW_W operations (e.g., "
64 "VWADD_W) with splat constants"),
65 cl::init(false));
66
68 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
69 cl::desc("Set the minimum number of repetitions of a divisor to allow "
70 "transformation to multiplications by the reciprocal"),
71 cl::init(2));
72
73static cl::opt<int>
75 cl::desc("Give the maximum number of instructions that we will "
76 "use for creating a floating-point immediate value"),
77 cl::init(2));
78
80 const RISCVSubtarget &STI)
81 : TargetLowering(TM), Subtarget(STI) {
82
83 RISCVABI::ABI ABI = Subtarget.getTargetABI();
84 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
85
86 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
87 !Subtarget.hasStdExtF()) {
88 errs() << "Hard-float 'f' ABI can't be used for a target that "
89 "doesn't support the F instruction set extension (ignoring "
90 "target-abi)\n";
92 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
93 !Subtarget.hasStdExtD()) {
94 errs() << "Hard-float 'd' ABI can't be used for a target that "
95 "doesn't support the D instruction set extension (ignoring "
96 "target-abi)\n";
98 }
99
100 switch (ABI) {
101 default:
102 report_fatal_error("Don't know how to lower this ABI");
111 break;
112 }
113
114 MVT XLenVT = Subtarget.getXLenVT();
115
116 // Set up the register classes.
117 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
118
119 if (Subtarget.hasStdExtZfhmin())
120 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
121 if (Subtarget.hasStdExtZfbfmin())
122 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
123 if (Subtarget.hasStdExtF())
124 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
125 if (Subtarget.hasStdExtD())
126 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
127 if (Subtarget.hasStdExtZhinxmin())
128 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
129 if (Subtarget.hasStdExtZfinx())
130 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
131 if (Subtarget.hasStdExtZdinx()) {
132 if (Subtarget.is64Bit())
133 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
134 else
135 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
136 }
137
138 static const MVT::SimpleValueType BoolVecVTs[] = {
139 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
140 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
141 static const MVT::SimpleValueType IntVecVTs[] = {
142 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
143 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
144 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
145 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
146 MVT::nxv4i64, MVT::nxv8i64};
147 static const MVT::SimpleValueType F16VecVTs[] = {
148 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
149 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
150 static const MVT::SimpleValueType BF16VecVTs[] = {
151 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
152 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
153 static const MVT::SimpleValueType F32VecVTs[] = {
154 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
155 static const MVT::SimpleValueType F64VecVTs[] = {
156 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
157
158 if (Subtarget.hasVInstructions()) {
159 auto addRegClassForRVV = [this](MVT VT) {
160 // Disable the smallest fractional LMUL types if ELEN is less than
161 // RVVBitsPerBlock.
162 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
163 if (VT.getVectorMinNumElements() < MinElts)
164 return;
165
166 unsigned Size = VT.getSizeInBits().getKnownMinValue();
167 const TargetRegisterClass *RC;
169 RC = &RISCV::VRRegClass;
170 else if (Size == 2 * RISCV::RVVBitsPerBlock)
171 RC = &RISCV::VRM2RegClass;
172 else if (Size == 4 * RISCV::RVVBitsPerBlock)
173 RC = &RISCV::VRM4RegClass;
174 else if (Size == 8 * RISCV::RVVBitsPerBlock)
175 RC = &RISCV::VRM8RegClass;
176 else
177 llvm_unreachable("Unexpected size");
178
179 addRegisterClass(VT, RC);
180 };
181
182 for (MVT VT : BoolVecVTs)
183 addRegClassForRVV(VT);
184 for (MVT VT : IntVecVTs) {
185 if (VT.getVectorElementType() == MVT::i64 &&
186 !Subtarget.hasVInstructionsI64())
187 continue;
188 addRegClassForRVV(VT);
189 }
190
191 if (Subtarget.hasVInstructionsF16Minimal())
192 for (MVT VT : F16VecVTs)
193 addRegClassForRVV(VT);
194
195 if (Subtarget.hasVInstructionsBF16Minimal())
196 for (MVT VT : BF16VecVTs)
197 addRegClassForRVV(VT);
198
199 if (Subtarget.hasVInstructionsF32())
200 for (MVT VT : F32VecVTs)
201 addRegClassForRVV(VT);
202
203 if (Subtarget.hasVInstructionsF64())
204 for (MVT VT : F64VecVTs)
205 addRegClassForRVV(VT);
206
207 if (Subtarget.useRVVForFixedLengthVectors()) {
208 auto addRegClassForFixedVectors = [this](MVT VT) {
209 MVT ContainerVT = getContainerForFixedLengthVector(VT);
210 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
211 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
212 addRegisterClass(VT, TRI.getRegClass(RCID));
213 };
215 if (useRVVForFixedLengthVectorVT(VT))
216 addRegClassForFixedVectors(VT);
217
219 if (useRVVForFixedLengthVectorVT(VT))
220 addRegClassForFixedVectors(VT);
221 }
222 }
223
224 // Compute derived properties from the register classes.
226
228
230 MVT::i1, Promote);
231 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
233 MVT::i1, Promote);
234
235 // TODO: add all necessary setOperationAction calls.
237
242
247 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
250 }
251
253
256
258
260
261 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
262 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
263 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
264
265 if (Subtarget.is64Bit()) {
267
270 MVT::i32, Custom);
272 if (!Subtarget.hasStdExtZbb())
275 Custom);
277 }
278 if (!Subtarget.hasStdExtZmmul()) {
280 } else if (Subtarget.is64Bit()) {
283 } else {
285 }
286
287 if (!Subtarget.hasStdExtM()) {
289 Expand);
290 } else if (Subtarget.is64Bit()) {
292 {MVT::i8, MVT::i16, MVT::i32}, Custom);
293 }
294
297 Expand);
298
300 Custom);
301
302 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
303 if (Subtarget.is64Bit())
305 } else if (Subtarget.hasVendorXTHeadBb()) {
306 if (Subtarget.is64Bit())
309 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
311 } else {
313 }
314
315 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
316 // pattern match it directly in isel.
318 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
319 Subtarget.hasVendorXTHeadBb())
320 ? Legal
321 : Expand);
322
323 if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
325 } else {
326 // Zbkb can use rev8+brev8 to implement bitreverse.
328 Subtarget.hasStdExtZbkb() ? Custom : Expand);
329 }
330
331 if (Subtarget.hasStdExtZbb() ||
332 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
334 Legal);
335 }
336
337 if (Subtarget.hasStdExtZbb() ||
338 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
339 if (Subtarget.is64Bit())
341 } else {
343 }
344
345 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
346 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
347 // We need the custom lowering to make sure that the resulting sequence
348 // for the 32bit case is efficient on 64bit targets.
349 if (Subtarget.is64Bit())
351 } else {
353 }
354
355 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
357 } else if (Subtarget.hasShortForwardBranchOpt()) {
358 // We can use PseudoCCSUB to implement ABS.
360 } else if (Subtarget.is64Bit()) {
362 }
363
364 if (!Subtarget.hasVendorXTHeadCondMov())
366
367 static const unsigned FPLegalNodeTypes[] = {
375
376 static const ISD::CondCode FPCCToExpand[] = {
380
381 static const unsigned FPOpToExpand[] = {
383 ISD::FREM};
384
385 static const unsigned FPRndMode[] = {
388
389 if (Subtarget.hasStdExtZfhminOrZhinxmin())
391
392 static const unsigned ZfhminZfbfminPromoteOps[] = {
407
408 if (Subtarget.hasStdExtZfbfmin()) {
417 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
419 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
420 // DAGCombiner::visitFP_ROUND probably needs improvements first.
422 }
423
424 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
425 if (Subtarget.hasStdExtZfhOrZhinx()) {
426 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
427 setOperationAction(FPRndMode, MVT::f16,
428 Subtarget.hasStdExtZfa() ? Legal : Custom);
431 } else {
432 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
435 MVT::f16, Legal);
436 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
437 // DAGCombiner::visitFP_ROUND probably needs improvements first.
439 }
440
443 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
446
448 Subtarget.hasStdExtZfa() ? Legal : Promote);
453 MVT::f16, Promote);
454
455 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
456 // complete support for all operations in LegalizeDAG.
461 MVT::f16, Promote);
462
463 // We need to custom promote this.
464 if (Subtarget.is64Bit())
466
468 Subtarget.hasStdExtZfa() ? Legal : Custom);
469 }
470
471 if (Subtarget.hasStdExtFOrZfinx()) {
472 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
473 setOperationAction(FPRndMode, MVT::f32,
474 Subtarget.hasStdExtZfa() ? Legal : Custom);
475 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
479 setOperationAction(FPOpToExpand, MVT::f32, Expand);
480 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
481 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
482 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
483 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
487 Subtarget.isSoftFPABI() ? LibCall : Custom);
490
491 if (Subtarget.hasStdExtZfa()) {
494 } else {
496 }
497 }
498
499 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
501
502 if (Subtarget.hasStdExtDOrZdinx()) {
503 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
504
505 if (!Subtarget.is64Bit())
507
508 if (Subtarget.hasStdExtZfa()) {
509 setOperationAction(FPRndMode, MVT::f64, Legal);
512 } else {
513 if (Subtarget.is64Bit())
514 setOperationAction(FPRndMode, MVT::f64, Custom);
515
517 }
518
521 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
525 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
526 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
527 setOperationAction(FPOpToExpand, MVT::f64, Expand);
528 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
529 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
530 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
531 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
535 Subtarget.isSoftFPABI() ? LibCall : Custom);
538 }
539
540 if (Subtarget.is64Bit()) {
543 MVT::i32, Custom);
545 }
546
547 if (Subtarget.hasStdExtFOrZfinx()) {
549 Custom);
550
553 XLenVT, Legal);
554
557 }
558
561 XLenVT, Custom);
562
564
565 if (Subtarget.is64Bit())
567
568 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
569 // Unfortunately this can't be determined just from the ISA naming string.
571 Subtarget.is64Bit() ? Legal : Custom);
573 Subtarget.is64Bit() ? Legal : Custom);
574
577 if (Subtarget.is64Bit())
579
580 if (Subtarget.hasStdExtZicbop()) {
582 }
583
584 if (Subtarget.hasStdExtA()) {
586 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
588 else
590 } else if (Subtarget.hasForcedAtomics()) {
592 } else {
594 }
595
597
599
600 if (getTargetMachine().getTargetTriple().isOSLinux()) {
601 // Custom lowering of llvm.clear_cache.
603 }
604
605 if (Subtarget.hasVInstructions()) {
607
609
610 // RVV intrinsics may have illegal operands.
611 // We also need to custom legalize vmv.x.s.
614 {MVT::i8, MVT::i16}, Custom);
615 if (Subtarget.is64Bit())
617 MVT::i32, Custom);
618 else
620 MVT::i64, Custom);
621
623 MVT::Other, Custom);
624
625 static const unsigned IntegerVPOps[] = {
626 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
627 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
628 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
629 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
630 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
631 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
632 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
633 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
634 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
635 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
636 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
637 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
638 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
639 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
640 ISD::EXPERIMENTAL_VP_SPLAT};
641
642 static const unsigned FloatingPointVPOps[] = {
643 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
644 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
645 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
646 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
647 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
648 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
649 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
650 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
651 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
652 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
653 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
654 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
655 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
656 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
657
658 static const unsigned IntegerVecReduceOps[] = {
662
663 static const unsigned FloatingPointVecReduceOps[] = {
666
667 if (!Subtarget.is64Bit()) {
668 // We must custom-lower certain vXi64 operations on RV32 due to the vector
669 // element type being illegal.
671 MVT::i64, Custom);
672
673 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
674
675 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
676 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
677 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
678 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
679 MVT::i64, Custom);
680 }
681
682 for (MVT VT : BoolVecVTs) {
683 if (!isTypeLegal(VT))
684 continue;
685
687
688 // Mask VTs are custom-expanded into a series of standard nodes
692 VT, Custom);
693
695 Custom);
696
699 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
700 Expand);
701
702 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
703 Custom);
704
705 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
706
709 Custom);
710
712 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
713 Custom);
714
715 // RVV has native int->float & float->int conversions where the
716 // element type sizes are within one power-of-two of each other. Any
717 // wider distances between type sizes have to be lowered as sequences
718 // which progressively narrow the gap in stages.
723 VT, Custom);
725 Custom);
726
727 // Expand all extending loads to types larger than this, and truncating
728 // stores from types larger than this.
730 setTruncStoreAction(VT, OtherVT, Expand);
732 OtherVT, Expand);
733 }
734
735 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
736 ISD::VP_TRUNCATE, ISD::VP_SETCC},
737 VT, Custom);
738
741
743
744 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
745 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
746
749 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
750 }
751
752 for (MVT VT : IntVecVTs) {
753 if (!isTypeLegal(VT))
754 continue;
755
758
759 // Vectors implement MULHS/MULHU.
761
762 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
763 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
765
767 Legal);
768
770
771 // Custom-lower extensions and truncations from/to mask types.
773 VT, Custom);
774
775 // RVV has native int->float & float->int conversions where the
776 // element type sizes are within one power-of-two of each other. Any
777 // wider distances between type sizes have to be lowered as sequences
778 // which progressively narrow the gap in stages.
783 VT, Custom);
785 Custom);
789 VT, Legal);
790
791 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
792 // nodes which truncate by one power of two at a time.
795 Custom);
796
797 // Custom-lower insert/extract operations to simplify patterns.
799 Custom);
800
801 // Custom-lower reduction operations to set up the corresponding custom
802 // nodes' operands.
803 setOperationAction(IntegerVecReduceOps, VT, Custom);
804
805 setOperationAction(IntegerVPOps, VT, Custom);
806
808
810 VT, Custom);
811
813 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
814 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
815 VT, Custom);
816
819 VT, Custom);
820
823
825
827 setTruncStoreAction(VT, OtherVT, Expand);
829 OtherVT, Expand);
830 }
831
834
835 // Splice
837
838 if (Subtarget.hasStdExtZvkb()) {
840 setOperationAction(ISD::VP_BSWAP, VT, Custom);
841 } else {
842 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
844 }
845
846 if (Subtarget.hasStdExtZvbb()) {
848 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
849 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
850 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
851 VT, Custom);
852 } else {
853 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
855 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
856 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
857 VT, Expand);
858
859 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
860 // range of f32.
861 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
862 if (isTypeLegal(FloatVT)) {
864 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
865 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
866 VT, Custom);
867 }
868 }
869 }
870
871 // Expand various CCs to best match the RVV ISA, which natively supports UNE
872 // but no other unordered comparisons, and supports all ordered comparisons
873 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
874 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
875 // and we pattern-match those back to the "original", swapping operands once
876 // more. This way we catch both operations and both "vf" and "fv" forms with
877 // fewer patterns.
878 static const ISD::CondCode VFPCCToExpand[] = {
882 };
883
884 // TODO: support more ops.
885 static const unsigned ZvfhminPromoteOps[] = {
893
894 // TODO: support more vp ops.
895 static const unsigned ZvfhminPromoteVPOps[] = {
896 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
897 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
898 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
899 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
900 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
901 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
902 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
903 ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
904 ISD::VP_FMAXIMUM, ISD::VP_REDUCE_FMINIMUM, ISD::VP_REDUCE_FMAXIMUM};
905
906 // Sets common operation actions on RVV floating-point vector types.
907 const auto SetCommonVFPActions = [&](MVT VT) {
909 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
910 // sizes are within one power-of-two of each other. Therefore conversions
911 // between vXf16 and vXf64 must be lowered as sequences which convert via
912 // vXf32.
915 // Custom-lower insert/extract operations to simplify patterns.
917 Custom);
918 // Expand various condition codes (explained above).
919 setCondCodeAction(VFPCCToExpand, VT, Expand);
920
923
927 VT, Custom);
928
929 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
930
931 // Expand FP operations that need libcalls.
943
945
947
949 VT, Custom);
950
952 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
953 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
954 VT, Custom);
955
958
961 VT, Custom);
962
965
967
968 setOperationAction(FloatingPointVPOps, VT, Custom);
969
971 Custom);
974 VT, Legal);
979 VT, Custom);
980 };
981
982 // Sets common extload/truncstore actions on RVV floating-point vector
983 // types.
984 const auto SetCommonVFPExtLoadTruncStoreActions =
985 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
986 for (auto SmallVT : SmallerVTs) {
987 setTruncStoreAction(VT, SmallVT, Expand);
988 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
989 }
990 };
991
992 if (Subtarget.hasVInstructionsF16()) {
993 for (MVT VT : F16VecVTs) {
994 if (!isTypeLegal(VT))
995 continue;
996 SetCommonVFPActions(VT);
997 }
998 } else if (Subtarget.hasVInstructionsF16Minimal()) {
999 for (MVT VT : F16VecVTs) {
1000 if (!isTypeLegal(VT))
1001 continue;
1004 Custom);
1005 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1006 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1007 Custom);
1010 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1011 VT, Custom);
1014 VT, Custom);
1015 if (Subtarget.hasStdExtZfhmin())
1017 // load/store
1019
1020 // Custom split nxv32f16 since nxv32f32 if not legal.
1021 if (VT == MVT::nxv32f16) {
1022 setOperationAction(ZvfhminPromoteOps, VT, Custom);
1023 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1024 continue;
1025 }
1026 // Add more promote ops.
1027 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1028 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1029 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1030 }
1031 }
1032
1033 // TODO: Could we merge some code with zvfhmin?
1034 if (Subtarget.hasVInstructionsBF16Minimal()) {
1035 for (MVT VT : BF16VecVTs) {
1036 if (!isTypeLegal(VT))
1037 continue;
1039 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1041 Custom);
1044 VT, Custom);
1046 if (Subtarget.hasStdExtZfbfmin())
1048 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1049 Custom);
1051 // TODO: Promote to fp32.
1052 }
1053 }
1054
1055 if (Subtarget.hasVInstructionsF32()) {
1056 for (MVT VT : F32VecVTs) {
1057 if (!isTypeLegal(VT))
1058 continue;
1059 SetCommonVFPActions(VT);
1060 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1061 }
1062 }
1063
1064 if (Subtarget.hasVInstructionsF64()) {
1065 for (MVT VT : F64VecVTs) {
1066 if (!isTypeLegal(VT))
1067 continue;
1068 SetCommonVFPActions(VT);
1069 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1070 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1071 }
1072 }
1073
1074 if (Subtarget.useRVVForFixedLengthVectors()) {
1076 if (!useRVVForFixedLengthVectorVT(VT))
1077 continue;
1078
1079 // By default everything must be expanded.
1080 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1083 setTruncStoreAction(VT, OtherVT, Expand);
1085 OtherVT, Expand);
1086 }
1087
1088 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1089 // expansion to a build_vector of 0s.
1091
1092 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1094 Custom);
1095
1098 Custom);
1099
1101 VT, Custom);
1102
1104
1106
1108
1110
1113 Custom);
1114
1116
1119 Custom);
1120
1122 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1123 Custom);
1124
1126 {
1135 },
1136 VT, Custom);
1138 Custom);
1139
1141
1142 // Operations below are different for between masks and other vectors.
1143 if (VT.getVectorElementType() == MVT::i1) {
1144 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1145 ISD::OR, ISD::XOR},
1146 VT, Custom);
1147
1148 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1149 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1150 VT, Custom);
1151
1152 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1153 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1154 continue;
1155 }
1156
1157 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1158 // it before type legalization for i64 vectors on RV32. It will then be
1159 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1160 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1161 // improvements first.
1162 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1165 }
1166
1169
1170 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1171 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1172 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1173 ISD::VP_SCATTER},
1174 VT, Custom);
1175
1179 VT, Custom);
1180
1183
1185
1186 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1187 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1189
1193 VT, Custom);
1194
1196
1199
1200 // Custom-lower reduction operations to set up the corresponding custom
1201 // nodes' operands.
1205 VT, Custom);
1206
1207 setOperationAction(IntegerVPOps, VT, Custom);
1208
1209 if (Subtarget.hasStdExtZvkb())
1211
1212 if (Subtarget.hasStdExtZvbb()) {
1215 VT, Custom);
1216 } else {
1217 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1218 // range of f32.
1219 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1220 if (isTypeLegal(FloatVT))
1223 Custom);
1224 }
1225 }
1226
1228 // There are no extending loads or truncating stores.
1229 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1230 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1231 setTruncStoreAction(VT, InnerVT, Expand);
1232 }
1233
1234 if (!useRVVForFixedLengthVectorVT(VT))
1235 continue;
1236
1237 // By default everything must be expanded.
1238 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1240
1241 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1242 // expansion to a build_vector of 0s.
1244
1247 VT, Custom);
1248
1249 // FIXME: mload, mstore, mgather, mscatter, vp_load/store,
1250 // vp_stride_load/store, vp_gather/scatter can be hoisted to here.
1252
1255 Custom);
1256
1257 if (VT.getVectorElementType() == MVT::f16 &&
1258 !Subtarget.hasVInstructionsF16()) {
1259 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1261 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1262 Custom);
1264 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1265 VT, Custom);
1267 if (Subtarget.hasStdExtZfhmin()) {
1268 // FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR.
1270 } else {
1271 // We need to custom legalize f16 build vectors if Zfhmin isn't
1272 // available.
1274 }
1275 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1276 // Don't promote f16 vector operations to f32 if f32 vector type is
1277 // not legal.
1278 // TODO: could split the f16 vector into two vectors and do promotion.
1279 if (!isTypeLegal(F32VecVT))
1280 continue;
1281 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1282 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1283 continue;
1284 }
1285
1286 if (VT.getVectorElementType() == MVT::bf16) {
1287 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1288 // FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR.
1291 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1292 Custom);
1293 // TODO: Promote to fp32.
1294 continue;
1295 }
1296
1299 VT, Custom);
1300
1303
1304 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1305 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1306 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1307 ISD::VP_SCATTER},
1308 VT, Custom);
1309
1314 VT, Custom);
1315
1318 VT, Custom);
1319
1320 setCondCodeAction(VFPCCToExpand, VT, Expand);
1321
1324
1326
1327 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1328
1329 setOperationAction(FloatingPointVPOps, VT, Custom);
1330
1337 VT, Custom);
1338 }
1339
1340 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1341 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1342 Custom);
1343 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1345 if (Subtarget.hasStdExtFOrZfinx())
1347 if (Subtarget.hasStdExtDOrZdinx())
1349 }
1350 }
1351
1352 if (Subtarget.hasStdExtA())
1354
1355 if (Subtarget.hasForcedAtomics()) {
1356 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1362 XLenVT, LibCall);
1363 }
1364
1365 if (Subtarget.hasVendorXTHeadMemIdx()) {
1366 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1367 setIndexedLoadAction(im, MVT::i8, Legal);
1368 setIndexedStoreAction(im, MVT::i8, Legal);
1369 setIndexedLoadAction(im, MVT::i16, Legal);
1370 setIndexedStoreAction(im, MVT::i16, Legal);
1371 setIndexedLoadAction(im, MVT::i32, Legal);
1372 setIndexedStoreAction(im, MVT::i32, Legal);
1373
1374 if (Subtarget.is64Bit()) {
1375 setIndexedLoadAction(im, MVT::i64, Legal);
1376 setIndexedStoreAction(im, MVT::i64, Legal);
1377 }
1378 }
1379 }
1380
1381 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1385
1389 }
1390
1391 // Function alignments.
1392 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1393 setMinFunctionAlignment(FunctionAlignment);
1394 // Set preferred alignments.
1397
1402
1403 if (Subtarget.hasStdExtFOrZfinx())
1405
1406 if (Subtarget.hasStdExtZbb())
1408
1409 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1410 Subtarget.hasVInstructions())
1412
1413 if (Subtarget.hasStdExtZbkb())
1415 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1417 if (Subtarget.hasStdExtFOrZfinx())
1420 if (Subtarget.hasVInstructions())
1422 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1425 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1428 if (Subtarget.hasVendorXTHeadMemPair())
1430 if (Subtarget.useRVVForFixedLengthVectors())
1432
1433 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1434 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1435
1436 // Disable strict node mutation.
1437 IsStrictFPEnabled = true;
1438 EnableExtLdPromotion = true;
1439
1440 // Let the subtarget decide if a predictable select is more expensive than the
1441 // corresponding branch. This information is used in CGP/SelectOpt to decide
1442 // when to convert selects into branches.
1443 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1444}
1445
1447 LLVMContext &Context,
1448 EVT VT) const {
1449 if (!VT.isVector())
1450 return getPointerTy(DL);
1451 if (Subtarget.hasVInstructions() &&
1452 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1453 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1455}
1456
1457MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1458 return Subtarget.getXLenVT();
1459}
1460
1461// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1462bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1463 unsigned VF,
1464 bool IsScalable) const {
1465 if (!Subtarget.hasVInstructions())
1466 return true;
1467
1468 if (!IsScalable)
1469 return true;
1470
1471 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1472 return true;
1473
1474 // Don't allow VF=1 if those types are't legal.
1475 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1476 return true;
1477
1478 // VLEN=32 support is incomplete.
1479 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1480 return true;
1481
1482 // The maximum VF is for the smallest element width with LMUL=8.
1483 // VF must be a power of 2.
1484 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1485 return VF > MaxVF || !isPowerOf2_32(VF);
1486}
1487
1489 return !Subtarget.hasVInstructions() ||
1490 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1491}
1492
1494 const CallInst &I,
1495 MachineFunction &MF,
1496 unsigned Intrinsic) const {
1497 auto &DL = I.getDataLayout();
1498
1499 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1500 bool IsUnitStrided, bool UsePtrVal = false) {
1502 // We can't use ptrVal if the intrinsic can access memory before the
1503 // pointer. This means we can't use it for strided or indexed intrinsics.
1504 if (UsePtrVal)
1505 Info.ptrVal = I.getArgOperand(PtrOp);
1506 else
1507 Info.fallbackAddressSpace =
1508 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1509 Type *MemTy;
1510 if (IsStore) {
1511 // Store value is the first operand.
1512 MemTy = I.getArgOperand(0)->getType();
1513 } else {
1514 // Use return type. If it's segment load, return type is a struct.
1515 MemTy = I.getType();
1516 if (MemTy->isStructTy())
1517 MemTy = MemTy->getStructElementType(0);
1518 }
1519 if (!IsUnitStrided)
1520 MemTy = MemTy->getScalarType();
1521
1522 Info.memVT = getValueType(DL, MemTy);
1523 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1525 Info.flags |=
1527 return true;
1528 };
1529
1530 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1532
1534 switch (Intrinsic) {
1535 default:
1536 return false;
1537 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1538 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1539 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1540 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1541 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1542 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1543 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1544 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1545 case Intrinsic::riscv_masked_cmpxchg_i32:
1547 Info.memVT = MVT::i32;
1548 Info.ptrVal = I.getArgOperand(0);
1549 Info.offset = 0;
1550 Info.align = Align(4);
1553 return true;
1554 case Intrinsic::riscv_seg2_load:
1555 case Intrinsic::riscv_seg3_load:
1556 case Intrinsic::riscv_seg4_load:
1557 case Intrinsic::riscv_seg5_load:
1558 case Intrinsic::riscv_seg6_load:
1559 case Intrinsic::riscv_seg7_load:
1560 case Intrinsic::riscv_seg8_load:
1561 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1562 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1563 case Intrinsic::riscv_seg2_store:
1564 case Intrinsic::riscv_seg3_store:
1565 case Intrinsic::riscv_seg4_store:
1566 case Intrinsic::riscv_seg5_store:
1567 case Intrinsic::riscv_seg6_store:
1568 case Intrinsic::riscv_seg7_store:
1569 case Intrinsic::riscv_seg8_store:
1570 // Operands are (vec, ..., vec, ptr, vl)
1571 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1572 /*IsStore*/ true,
1573 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1574 case Intrinsic::riscv_vle:
1575 case Intrinsic::riscv_vle_mask:
1576 case Intrinsic::riscv_vleff:
1577 case Intrinsic::riscv_vleff_mask:
1578 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1579 /*IsStore*/ false,
1580 /*IsUnitStrided*/ true,
1581 /*UsePtrVal*/ true);
1582 case Intrinsic::riscv_vse:
1583 case Intrinsic::riscv_vse_mask:
1584 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1585 /*IsStore*/ true,
1586 /*IsUnitStrided*/ true,
1587 /*UsePtrVal*/ true);
1588 case Intrinsic::riscv_vlse:
1589 case Intrinsic::riscv_vlse_mask:
1590 case Intrinsic::riscv_vloxei:
1591 case Intrinsic::riscv_vloxei_mask:
1592 case Intrinsic::riscv_vluxei:
1593 case Intrinsic::riscv_vluxei_mask:
1594 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1595 /*IsStore*/ false,
1596 /*IsUnitStrided*/ false);
1597 case Intrinsic::riscv_vsse:
1598 case Intrinsic::riscv_vsse_mask:
1599 case Intrinsic::riscv_vsoxei:
1600 case Intrinsic::riscv_vsoxei_mask:
1601 case Intrinsic::riscv_vsuxei:
1602 case Intrinsic::riscv_vsuxei_mask:
1603 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1604 /*IsStore*/ true,
1605 /*IsUnitStrided*/ false);
1606 case Intrinsic::riscv_vlseg2:
1607 case Intrinsic::riscv_vlseg3:
1608 case Intrinsic::riscv_vlseg4:
1609 case Intrinsic::riscv_vlseg5:
1610 case Intrinsic::riscv_vlseg6:
1611 case Intrinsic::riscv_vlseg7:
1612 case Intrinsic::riscv_vlseg8:
1613 case Intrinsic::riscv_vlseg2ff:
1614 case Intrinsic::riscv_vlseg3ff:
1615 case Intrinsic::riscv_vlseg4ff:
1616 case Intrinsic::riscv_vlseg5ff:
1617 case Intrinsic::riscv_vlseg6ff:
1618 case Intrinsic::riscv_vlseg7ff:
1619 case Intrinsic::riscv_vlseg8ff:
1620 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1621 /*IsStore*/ false,
1622 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1623 case Intrinsic::riscv_vlseg2_mask:
1624 case Intrinsic::riscv_vlseg3_mask:
1625 case Intrinsic::riscv_vlseg4_mask:
1626 case Intrinsic::riscv_vlseg5_mask:
1627 case Intrinsic::riscv_vlseg6_mask:
1628 case Intrinsic::riscv_vlseg7_mask:
1629 case Intrinsic::riscv_vlseg8_mask:
1630 case Intrinsic::riscv_vlseg2ff_mask:
1631 case Intrinsic::riscv_vlseg3ff_mask:
1632 case Intrinsic::riscv_vlseg4ff_mask:
1633 case Intrinsic::riscv_vlseg5ff_mask:
1634 case Intrinsic::riscv_vlseg6ff_mask:
1635 case Intrinsic::riscv_vlseg7ff_mask:
1636 case Intrinsic::riscv_vlseg8ff_mask:
1637 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1638 /*IsStore*/ false,
1639 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1640 case Intrinsic::riscv_vlsseg2:
1641 case Intrinsic::riscv_vlsseg3:
1642 case Intrinsic::riscv_vlsseg4:
1643 case Intrinsic::riscv_vlsseg5:
1644 case Intrinsic::riscv_vlsseg6:
1645 case Intrinsic::riscv_vlsseg7:
1646 case Intrinsic::riscv_vlsseg8:
1647 case Intrinsic::riscv_vloxseg2:
1648 case Intrinsic::riscv_vloxseg3:
1649 case Intrinsic::riscv_vloxseg4:
1650 case Intrinsic::riscv_vloxseg5:
1651 case Intrinsic::riscv_vloxseg6:
1652 case Intrinsic::riscv_vloxseg7:
1653 case Intrinsic::riscv_vloxseg8:
1654 case Intrinsic::riscv_vluxseg2:
1655 case Intrinsic::riscv_vluxseg3:
1656 case Intrinsic::riscv_vluxseg4:
1657 case Intrinsic::riscv_vluxseg5:
1658 case Intrinsic::riscv_vluxseg6:
1659 case Intrinsic::riscv_vluxseg7:
1660 case Intrinsic::riscv_vluxseg8:
1661 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1662 /*IsStore*/ false,
1663 /*IsUnitStrided*/ false);
1664 case Intrinsic::riscv_vlsseg2_mask:
1665 case Intrinsic::riscv_vlsseg3_mask:
1666 case Intrinsic::riscv_vlsseg4_mask:
1667 case Intrinsic::riscv_vlsseg5_mask:
1668 case Intrinsic::riscv_vlsseg6_mask:
1669 case Intrinsic::riscv_vlsseg7_mask:
1670 case Intrinsic::riscv_vlsseg8_mask:
1671 case Intrinsic::riscv_vloxseg2_mask:
1672 case Intrinsic::riscv_vloxseg3_mask:
1673 case Intrinsic::riscv_vloxseg4_mask:
1674 case Intrinsic::riscv_vloxseg5_mask:
1675 case Intrinsic::riscv_vloxseg6_mask:
1676 case Intrinsic::riscv_vloxseg7_mask:
1677 case Intrinsic::riscv_vloxseg8_mask:
1678 case Intrinsic::riscv_vluxseg2_mask:
1679 case Intrinsic::riscv_vluxseg3_mask:
1680 case Intrinsic::riscv_vluxseg4_mask:
1681 case Intrinsic::riscv_vluxseg5_mask:
1682 case Intrinsic::riscv_vluxseg6_mask:
1683 case Intrinsic::riscv_vluxseg7_mask:
1684 case Intrinsic::riscv_vluxseg8_mask:
1685 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1686 /*IsStore*/ false,
1687 /*IsUnitStrided*/ false);
1688 case Intrinsic::riscv_vsseg2:
1689 case Intrinsic::riscv_vsseg3:
1690 case Intrinsic::riscv_vsseg4:
1691 case Intrinsic::riscv_vsseg5:
1692 case Intrinsic::riscv_vsseg6:
1693 case Intrinsic::riscv_vsseg7:
1694 case Intrinsic::riscv_vsseg8:
1695 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1696 /*IsStore*/ true,
1697 /*IsUnitStrided*/ false);
1698 case Intrinsic::riscv_vsseg2_mask:
1699 case Intrinsic::riscv_vsseg3_mask:
1700 case Intrinsic::riscv_vsseg4_mask:
1701 case Intrinsic::riscv_vsseg5_mask:
1702 case Intrinsic::riscv_vsseg6_mask:
1703 case Intrinsic::riscv_vsseg7_mask:
1704 case Intrinsic::riscv_vsseg8_mask:
1705 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1706 /*IsStore*/ true,
1707 /*IsUnitStrided*/ false);
1708 case Intrinsic::riscv_vssseg2:
1709 case Intrinsic::riscv_vssseg3:
1710 case Intrinsic::riscv_vssseg4:
1711 case Intrinsic::riscv_vssseg5:
1712 case Intrinsic::riscv_vssseg6:
1713 case Intrinsic::riscv_vssseg7:
1714 case Intrinsic::riscv_vssseg8:
1715 case Intrinsic::riscv_vsoxseg2:
1716 case Intrinsic::riscv_vsoxseg3:
1717 case Intrinsic::riscv_vsoxseg4:
1718 case Intrinsic::riscv_vsoxseg5:
1719 case Intrinsic::riscv_vsoxseg6:
1720 case Intrinsic::riscv_vsoxseg7:
1721 case Intrinsic::riscv_vsoxseg8:
1722 case Intrinsic::riscv_vsuxseg2:
1723 case Intrinsic::riscv_vsuxseg3:
1724 case Intrinsic::riscv_vsuxseg4:
1725 case Intrinsic::riscv_vsuxseg5:
1726 case Intrinsic::riscv_vsuxseg6:
1727 case Intrinsic::riscv_vsuxseg7:
1728 case Intrinsic::riscv_vsuxseg8:
1729 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1730 /*IsStore*/ true,
1731 /*IsUnitStrided*/ false);
1732 case Intrinsic::riscv_vssseg2_mask:
1733 case Intrinsic::riscv_vssseg3_mask:
1734 case Intrinsic::riscv_vssseg4_mask:
1735 case Intrinsic::riscv_vssseg5_mask:
1736 case Intrinsic::riscv_vssseg6_mask:
1737 case Intrinsic::riscv_vssseg7_mask:
1738 case Intrinsic::riscv_vssseg8_mask:
1739 case Intrinsic::riscv_vsoxseg2_mask:
1740 case Intrinsic::riscv_vsoxseg3_mask:
1741 case Intrinsic::riscv_vsoxseg4_mask:
1742 case Intrinsic::riscv_vsoxseg5_mask:
1743 case Intrinsic::riscv_vsoxseg6_mask:
1744 case Intrinsic::riscv_vsoxseg7_mask:
1745 case Intrinsic::riscv_vsoxseg8_mask:
1746 case Intrinsic::riscv_vsuxseg2_mask:
1747 case Intrinsic::riscv_vsuxseg3_mask:
1748 case Intrinsic::riscv_vsuxseg4_mask:
1749 case Intrinsic::riscv_vsuxseg5_mask:
1750 case Intrinsic::riscv_vsuxseg6_mask:
1751 case Intrinsic::riscv_vsuxseg7_mask:
1752 case Intrinsic::riscv_vsuxseg8_mask:
1753 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1754 /*IsStore*/ true,
1755 /*IsUnitStrided*/ false);
1756 }
1757}
1758
1760 const AddrMode &AM, Type *Ty,
1761 unsigned AS,
1762 Instruction *I) const {
1763 // No global is ever allowed as a base.
1764 if (AM.BaseGV)
1765 return false;
1766
1767 // None of our addressing modes allows a scalable offset
1768 if (AM.ScalableOffset)
1769 return false;
1770
1771 // RVV instructions only support register addressing.
1772 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1773 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1774
1775 // Require a 12-bit signed offset.
1776 if (!isInt<12>(AM.BaseOffs))
1777 return false;
1778
1779 switch (AM.Scale) {
1780 case 0: // "r+i" or just "i", depending on HasBaseReg.
1781 break;
1782 case 1:
1783 if (!AM.HasBaseReg) // allow "r+i".
1784 break;
1785 return false; // disallow "r+r" or "r+r+i".
1786 default:
1787 return false;
1788 }
1789
1790 return true;
1791}
1792
1794 return isInt<12>(Imm);
1795}
1796
1798 return isInt<12>(Imm);
1799}
1800
1801// On RV32, 64-bit integers are split into their high and low parts and held
1802// in two different registers, so the trunc is free since the low register can
1803// just be used.
1804// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1805// isTruncateFree?
1807 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1808 return false;
1809 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1810 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1811 return (SrcBits == 64 && DestBits == 32);
1812}
1813
1815 // We consider i64->i32 free on RV64 since we have good selection of W
1816 // instructions that make promoting operations back to i64 free in many cases.
1817 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1818 !DstVT.isInteger())
1819 return false;
1820 unsigned SrcBits = SrcVT.getSizeInBits();
1821 unsigned DestBits = DstVT.getSizeInBits();
1822 return (SrcBits == 64 && DestBits == 32);
1823}
1824
1826 EVT SrcVT = Val.getValueType();
1827 // free truncate from vnsrl and vnsra
1828 if (Subtarget.hasVInstructions() &&
1829 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
1830 SrcVT.isVector() && VT2.isVector()) {
1831 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
1832 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
1833 if (SrcBits == DestBits * 2) {
1834 return true;
1835 }
1836 }
1837 return TargetLowering::isTruncateFree(Val, VT2);
1838}
1839
1841 // Zexts are free if they can be combined with a load.
1842 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1843 // poorly with type legalization of compares preferring sext.
1844 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1845 EVT MemVT = LD->getMemoryVT();
1846 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1847 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1848 LD->getExtensionType() == ISD::ZEXTLOAD))
1849 return true;
1850 }
1851
1852 return TargetLowering::isZExtFree(Val, VT2);
1853}
1854
1856 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1857}
1858
1860 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1861}
1862
1864 return Subtarget.hasStdExtZbb() ||
1865 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
1866}
1867
1869 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
1870 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
1871}
1872
1874 const Instruction &AndI) const {
1875 // We expect to be able to match a bit extraction instruction if the Zbs
1876 // extension is supported and the mask is a power of two. However, we
1877 // conservatively return false if the mask would fit in an ANDI instruction,
1878 // on the basis that it's possible the sinking+duplication of the AND in
1879 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1880 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1881 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1882 return false;
1883 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1884 if (!Mask)
1885 return false;
1886 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1887}
1888
1890 EVT VT = Y.getValueType();
1891
1892 // FIXME: Support vectors once we have tests.
1893 if (VT.isVector())
1894 return false;
1895
1896 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1897 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
1898}
1899
1901 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1902 if (Subtarget.hasStdExtZbs())
1903 return X.getValueType().isScalarInteger();
1904 auto *C = dyn_cast<ConstantSDNode>(Y);
1905 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1906 if (Subtarget.hasVendorXTHeadBs())
1907 return C != nullptr;
1908 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1909 return C && C->getAPIntValue().ule(10);
1910}
1911
1913 EVT VT) const {
1914 // Only enable for rvv.
1915 if (!VT.isVector() || !Subtarget.hasVInstructions())
1916 return false;
1917
1918 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1919 return false;
1920
1921 return true;
1922}
1923
1925 Type *Ty) const {
1926 assert(Ty->isIntegerTy());
1927
1928 unsigned BitSize = Ty->getIntegerBitWidth();
1929 if (BitSize > Subtarget.getXLen())
1930 return false;
1931
1932 // Fast path, assume 32-bit immediates are cheap.
1933 int64_t Val = Imm.getSExtValue();
1934 if (isInt<32>(Val))
1935 return true;
1936
1937 // A constant pool entry may be more aligned thant he load we're trying to
1938 // replace. If we don't support unaligned scalar mem, prefer the constant
1939 // pool.
1940 // TODO: Can the caller pass down the alignment?
1941 if (!Subtarget.enableUnalignedScalarMem())
1942 return true;
1943
1944 // Prefer to keep the load if it would require many instructions.
1945 // This uses the same threshold we use for constant pools but doesn't
1946 // check useConstantPoolForLargeInts.
1947 // TODO: Should we keep the load only when we're definitely going to emit a
1948 // constant pool?
1949
1951 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1952}
1953
1957 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1958 SelectionDAG &DAG) const {
1959 // One interesting pattern that we'd want to form is 'bit extract':
1960 // ((1 >> Y) & 1) ==/!= 0
1961 // But we also need to be careful not to try to reverse that fold.
1962
1963 // Is this '((1 >> Y) & 1)'?
1964 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1965 return false; // Keep the 'bit extract' pattern.
1966
1967 // Will this be '((1 >> Y) & 1)' after the transform?
1968 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1969 return true; // Do form the 'bit extract' pattern.
1970
1971 // If 'X' is a constant, and we transform, then we will immediately
1972 // try to undo the fold, thus causing endless combine loop.
1973 // So only do the transform if X is not a constant. This matches the default
1974 // implementation of this function.
1975 return !XC;
1976}
1977
1978bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1979 switch (Opcode) {
1980 case Instruction::Add:
1981 case Instruction::Sub:
1982 case Instruction::Mul:
1983 case Instruction::And:
1984 case Instruction::Or:
1985 case Instruction::Xor:
1986 case Instruction::FAdd:
1987 case Instruction::FSub:
1988 case Instruction::FMul:
1989 case Instruction::FDiv:
1990 case Instruction::ICmp:
1991 case Instruction::FCmp:
1992 return true;
1993 case Instruction::Shl:
1994 case Instruction::LShr:
1995 case Instruction::AShr:
1996 case Instruction::UDiv:
1997 case Instruction::SDiv:
1998 case Instruction::URem:
1999 case Instruction::SRem:
2000 case Instruction::Select:
2001 return Operand == 1;
2002 default:
2003 return false;
2004 }
2005}
2006
2007
2009 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2010 return false;
2011
2012 if (canSplatOperand(I->getOpcode(), Operand))
2013 return true;
2014
2015 auto *II = dyn_cast<IntrinsicInst>(I);
2016 if (!II)
2017 return false;
2018
2019 switch (II->getIntrinsicID()) {
2020 case Intrinsic::fma:
2021 case Intrinsic::vp_fma:
2022 return Operand == 0 || Operand == 1;
2023 case Intrinsic::vp_shl:
2024 case Intrinsic::vp_lshr:
2025 case Intrinsic::vp_ashr:
2026 case Intrinsic::vp_udiv:
2027 case Intrinsic::vp_sdiv:
2028 case Intrinsic::vp_urem:
2029 case Intrinsic::vp_srem:
2030 case Intrinsic::ssub_sat:
2031 case Intrinsic::vp_ssub_sat:
2032 case Intrinsic::usub_sat:
2033 case Intrinsic::vp_usub_sat:
2034 return Operand == 1;
2035 // These intrinsics are commutative.
2036 case Intrinsic::vp_add:
2037 case Intrinsic::vp_mul:
2038 case Intrinsic::vp_and:
2039 case Intrinsic::vp_or:
2040 case Intrinsic::vp_xor:
2041 case Intrinsic::vp_fadd:
2042 case Intrinsic::vp_fmul:
2043 case Intrinsic::vp_icmp:
2044 case Intrinsic::vp_fcmp:
2045 case Intrinsic::smin:
2046 case Intrinsic::vp_smin:
2047 case Intrinsic::umin:
2048 case Intrinsic::vp_umin:
2049 case Intrinsic::smax:
2050 case Intrinsic::vp_smax:
2051 case Intrinsic::umax:
2052 case Intrinsic::vp_umax:
2053 case Intrinsic::sadd_sat:
2054 case Intrinsic::vp_sadd_sat:
2055 case Intrinsic::uadd_sat:
2056 case Intrinsic::vp_uadd_sat:
2057 // These intrinsics have 'vr' versions.
2058 case Intrinsic::vp_sub:
2059 case Intrinsic::vp_fsub:
2060 case Intrinsic::vp_fdiv:
2061 return Operand == 0 || Operand == 1;
2062 default:
2063 return false;
2064 }
2065}
2066
2067/// Check if sinking \p I's operands to I's basic block is profitable, because
2068/// the operands can be folded into a target instruction, e.g.
2069/// splats of scalars can fold into vector instructions.
2071 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
2072 using namespace llvm::PatternMatch;
2073
2074 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2075 return false;
2076
2077 // Don't sink splat operands if the target prefers it. Some targets requires
2078 // S2V transfer buffers and we can run out of them copying the same value
2079 // repeatedly.
2080 // FIXME: It could still be worth doing if it would improve vector register
2081 // pressure and prevent a vector spill.
2082 if (!Subtarget.sinkSplatOperands())
2083 return false;
2084
2085 for (auto OpIdx : enumerate(I->operands())) {
2086 if (!canSplatOperand(I, OpIdx.index()))
2087 continue;
2088
2089 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2090 // Make sure we are not already sinking this operand
2091 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2092 continue;
2093
2094 // We are looking for a splat that can be sunk.
2096 m_Undef(), m_ZeroMask())))
2097 continue;
2098
2099 // Don't sink i1 splats.
2100 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2101 continue;
2102
2103 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2104 // and vector registers
2105 for (Use &U : Op->uses()) {
2106 Instruction *Insn = cast<Instruction>(U.getUser());
2107 if (!canSplatOperand(Insn, U.getOperandNo()))
2108 return false;
2109 }
2110
2111 Ops.push_back(&Op->getOperandUse(0));
2112 Ops.push_back(&OpIdx.value());
2113 }
2114 return true;
2115}
2116
2118 unsigned Opc = VecOp.getOpcode();
2119
2120 // Assume target opcodes can't be scalarized.
2121 // TODO - do we have any exceptions?
2122 if (Opc >= ISD::BUILTIN_OP_END)
2123 return false;
2124
2125 // If the vector op is not supported, try to convert to scalar.
2126 EVT VecVT = VecOp.getValueType();
2127 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2128 return true;
2129
2130 // If the vector op is supported, but the scalar op is not, the transform may
2131 // not be worthwhile.
2132 // Permit a vector binary operation can be converted to scalar binary
2133 // operation which is custom lowered with illegal type.
2134 EVT ScalarVT = VecVT.getScalarType();
2135 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2136 isOperationCustom(Opc, ScalarVT);
2137}
2138
2140 const GlobalAddressSDNode *GA) const {
2141 // In order to maximise the opportunity for common subexpression elimination,
2142 // keep a separate ADD node for the global address offset instead of folding
2143 // it in the global address node. Later peephole optimisations may choose to
2144 // fold it back in when profitable.
2145 return false;
2146}
2147
2148// Return one of the followings:
2149// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2150// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2151// positive counterpart, which will be materialized from the first returned
2152// element. The second returned element indicated that there should be a FNEG
2153// followed.
2154// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2155std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2156 EVT VT) const {
2157 if (!Subtarget.hasStdExtZfa())
2158 return std::make_pair(-1, false);
2159
2160 bool IsSupportedVT = false;
2161 if (VT == MVT::f16) {
2162 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2163 } else if (VT == MVT::f32) {
2164 IsSupportedVT = true;
2165 } else if (VT == MVT::f64) {
2166 assert(Subtarget.hasStdExtD() && "Expect D extension");
2167 IsSupportedVT = true;
2168 }
2169
2170 if (!IsSupportedVT)
2171 return std::make_pair(-1, false);
2172
2174 if (Index < 0 && Imm.isNegative())
2175 // Try the combination of its positive counterpart + FNEG.
2176 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
2177 else
2178 return std::make_pair(Index, false);
2179}
2180
2182 bool ForCodeSize) const {
2183 bool IsLegalVT = false;
2184 if (VT == MVT::f16)
2185 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2186 else if (VT == MVT::f32)
2187 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2188 else if (VT == MVT::f64)
2189 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2190 else if (VT == MVT::bf16)
2191 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2192
2193 if (!IsLegalVT)
2194 return false;
2195
2196 if (getLegalZfaFPImm(Imm, VT).first >= 0)
2197 return true;
2198
2199 // Cannot create a 64 bit floating-point immediate value for rv32.
2200 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2201 // td can handle +0.0 or -0.0 already.
2202 // -0.0 can be created by fmv + fneg.
2203 return Imm.isZero();
2204 }
2205
2206 // Special case: fmv + fneg
2207 if (Imm.isNegZero())
2208 return true;
2209
2210 // Building an integer and then converting requires a fmv at the end of
2211 // the integer sequence.
2212 const int Cost =
2213 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2214 Subtarget);
2215 return Cost <= FPImmCost;
2216}
2217
2218// TODO: This is very conservative.
2220 unsigned Index) const {
2222 return false;
2223
2224 // Only support extracting a fixed from a fixed vector for now.
2225 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2226 return false;
2227
2228 EVT EltVT = ResVT.getVectorElementType();
2229 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2230
2231 // The smallest type we can slide is i8.
2232 // TODO: We can extract index 0 from a mask vector without a slide.
2233 if (EltVT == MVT::i1)
2234 return false;
2235
2236 unsigned ResElts = ResVT.getVectorNumElements();
2237 unsigned SrcElts = SrcVT.getVectorNumElements();
2238
2239 unsigned MinVLen = Subtarget.getRealMinVLen();
2240 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2241
2242 // If we're extracting only data from the first VLEN bits of the source
2243 // then we can always do this with an m1 vslidedown.vx. Restricting the
2244 // Index ensures we can use a vslidedown.vi.
2245 // TODO: We can generalize this when the exact VLEN is known.
2246 if (Index + ResElts <= MinVLMAX && Index < 31)
2247 return true;
2248
2249 // Convervatively only handle extracting half of a vector.
2250 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2251 // a cheap extract. However, this case is important in practice for
2252 // shuffled extracts of longer vectors. How resolve?
2253 if ((ResElts * 2) != SrcElts)
2254 return false;
2255
2256 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2257 // cheap.
2258 if (Index >= 32)
2259 return false;
2260
2261 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2262 // the upper half of a vector until we have more test coverage.
2263 return Index == 0 || Index == ResElts;
2264}
2265
2268 EVT VT) const {
2269 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2270 // We might still end up using a GPR but that will be decided based on ABI.
2271 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2272 !Subtarget.hasStdExtZfhminOrZhinxmin())
2273 return MVT::f32;
2274
2276
2277 return PartVT;
2278}
2279
2282 EVT VT) const {
2283 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2284 // We might still end up using a GPR but that will be decided based on ABI.
2285 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2286 !Subtarget.hasStdExtZfhminOrZhinxmin())
2287 return 1;
2288
2290}
2291
2293 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2294 unsigned &NumIntermediates, MVT &RegisterVT) const {
2296 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2297
2298 return NumRegs;
2299}
2300
2301// Changes the condition code and swaps operands if necessary, so the SetCC
2302// operation matches one of the comparisons supported directly by branches
2303// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2304// with 1/-1.
2305static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2306 ISD::CondCode &CC, SelectionDAG &DAG) {
2307 // If this is a single bit test that can't be handled by ANDI, shift the
2308 // bit to be tested to the MSB and perform a signed compare with 0.
2309 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2310 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2311 isa<ConstantSDNode>(LHS.getOperand(1))) {
2312 uint64_t Mask = LHS.getConstantOperandVal(1);
2313 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2314 unsigned ShAmt = 0;
2315 if (isPowerOf2_64(Mask)) {
2317 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2318 } else {
2319 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2320 }
2321
2322 LHS = LHS.getOperand(0);
2323 if (ShAmt != 0)
2324 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2325 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2326 return;
2327 }
2328 }
2329
2330 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2331 int64_t C = RHSC->getSExtValue();
2332 switch (CC) {
2333 default: break;
2334 case ISD::SETGT:
2335 // Convert X > -1 to X >= 0.
2336 if (C == -1) {
2337 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2338 CC = ISD::SETGE;
2339 return;
2340 }
2341 break;
2342 case ISD::SETLT:
2343 // Convert X < 1 to 0 >= X.
2344 if (C == 1) {
2345 RHS = LHS;
2346 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2347 CC = ISD::SETGE;
2348 return;
2349 }
2350 break;
2351 }
2352 }
2353
2354 switch (CC) {
2355 default:
2356 break;
2357 case ISD::SETGT:
2358 case ISD::SETLE:
2359 case ISD::SETUGT:
2360 case ISD::SETULE:
2362 std::swap(LHS, RHS);
2363 break;
2364 }
2365}
2366
2368 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2369 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2370 if (VT.getVectorElementType() == MVT::i1)
2371 KnownSize *= 8;
2372
2373 switch (KnownSize) {
2374 default:
2375 llvm_unreachable("Invalid LMUL.");
2376 case 8:
2378 case 16:
2380 case 32:
2382 case 64:
2384 case 128:
2386 case 256:
2388 case 512:
2390 }
2391}
2392
2394 switch (LMul) {
2395 default:
2396 llvm_unreachable("Invalid LMUL.");
2401 return RISCV::VRRegClassID;
2403 return RISCV::VRM2RegClassID;
2405 return RISCV::VRM4RegClassID;
2407 return RISCV::VRM8RegClassID;
2408 }
2409}
2410
2412 RISCVII::VLMUL LMUL = getLMUL(VT);
2413 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2414 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2415 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2416 LMUL == RISCVII::VLMUL::LMUL_1) {
2417 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2418 "Unexpected subreg numbering");
2419 return RISCV::sub_vrm1_0 + Index;
2420 }
2421 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2422 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2423 "Unexpected subreg numbering");
2424 return RISCV::sub_vrm2_0 + Index;
2425 }
2426 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2427 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2428 "Unexpected subreg numbering");
2429 return RISCV::sub_vrm4_0 + Index;
2430 }
2431 llvm_unreachable("Invalid vector type.");
2432}
2433
2435 if (VT.getVectorElementType() == MVT::i1)
2436 return RISCV::VRRegClassID;
2437 return getRegClassIDForLMUL(getLMUL(VT));
2438}
2439
2440// Attempt to decompose a subvector insert/extract between VecVT and
2441// SubVecVT via subregister indices. Returns the subregister index that
2442// can perform the subvector insert/extract with the given element index, as
2443// well as the index corresponding to any leftover subvectors that must be
2444// further inserted/extracted within the register class for SubVecVT.
2445std::pair<unsigned, unsigned>
2447 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2448 const RISCVRegisterInfo *TRI) {
2449 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2450 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2451 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2452 "Register classes not ordered");
2453 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2454 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2455 // Try to compose a subregister index that takes us from the incoming
2456 // LMUL>1 register class down to the outgoing one. At each step we half
2457 // the LMUL:
2458 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2459 // Note that this is not guaranteed to find a subregister index, such as
2460 // when we are extracting from one VR type to another.
2461 unsigned SubRegIdx = RISCV::NoSubRegister;
2462 for (const unsigned RCID :
2463 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2464 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2465 VecVT = VecVT.getHalfNumVectorElementsVT();
2466 bool IsHi =
2467 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2468 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2469 getSubregIndexByMVT(VecVT, IsHi));
2470 if (IsHi)
2471 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2472 }
2473 return {SubRegIdx, InsertExtractIdx};
2474}
2475
2476// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2477// stores for those types.
2478bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2479 return !Subtarget.useRVVForFixedLengthVectors() ||
2480 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2481}
2482
2484 if (!ScalarTy.isSimple())
2485 return false;
2486 switch (ScalarTy.getSimpleVT().SimpleTy) {
2487 case MVT::iPTR:
2488 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2489 case MVT::i8:
2490 case MVT::i16:
2491 case MVT::i32:
2492 return true;
2493 case MVT::i64:
2494 return Subtarget.hasVInstructionsI64();
2495 case MVT::f16:
2496 return Subtarget.hasVInstructionsF16();
2497 case MVT::f32:
2498 return Subtarget.hasVInstructionsF32();
2499 case MVT::f64:
2500 return Subtarget.hasVInstructionsF64();
2501 default:
2502 return false;
2503 }
2504}
2505
2506
2507unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2508 return NumRepeatedDivisors;
2509}
2510
2512 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2513 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2514 "Unexpected opcode");
2515 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2516 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2518 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2519 if (!II)
2520 return SDValue();
2521 return Op.getOperand(II->VLOperand + 1 + HasChain);
2522}
2523
2525 const RISCVSubtarget &Subtarget) {
2526 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2527 if (!Subtarget.useRVVForFixedLengthVectors())
2528 return false;
2529
2530 // We only support a set of vector types with a consistent maximum fixed size
2531 // across all supported vector element types to avoid legalization issues.
2532 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2533 // fixed-length vector type we support is 1024 bytes.
2534 if (VT.getFixedSizeInBits() > 1024 * 8)
2535 return false;
2536
2537 unsigned MinVLen = Subtarget.getRealMinVLen();
2538
2539 MVT EltVT = VT.getVectorElementType();
2540
2541 // Don't use RVV for vectors we cannot scalarize if required.
2542 switch (EltVT.SimpleTy) {
2543 // i1 is supported but has different rules.
2544 default:
2545 return false;
2546 case MVT::i1:
2547 // Masks can only use a single register.
2548 if (VT.getVectorNumElements() > MinVLen)
2549 return false;
2550 MinVLen /= 8;
2551 break;
2552 case MVT::i8:
2553 case MVT::i16:
2554 case MVT::i32:
2555 break;
2556 case MVT::i64:
2557 if (!Subtarget.hasVInstructionsI64())
2558 return false;
2559 break;
2560 case MVT::f16:
2561 if (!Subtarget.hasVInstructionsF16Minimal())
2562 return false;
2563 break;
2564 case MVT::bf16:
2565 if (!Subtarget.hasVInstructionsBF16Minimal())
2566 return false;
2567 break;
2568 case MVT::f32:
2569 if (!Subtarget.hasVInstructionsF32())
2570 return false;
2571 break;
2572 case MVT::f64:
2573 if (!Subtarget.hasVInstructionsF64())
2574 return false;
2575 break;
2576 }
2577
2578 // Reject elements larger than ELEN.
2579 if (EltVT.getSizeInBits() > Subtarget.getELen())
2580 return false;
2581
2582 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2583 // Don't use RVV for types that don't fit.
2584 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2585 return false;
2586
2587 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2588 // the base fixed length RVV support in place.
2589 if (!VT.isPow2VectorType())
2590 return false;
2591
2592 return true;
2593}
2594
2595bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2596 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2597}
2598
2599// Return the largest legal scalable vector type that matches VT's element type.
2601 const RISCVSubtarget &Subtarget) {
2602 // This may be called before legal types are setup.
2603 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2604 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2605 "Expected legal fixed length vector!");
2606
2607 unsigned MinVLen = Subtarget.getRealMinVLen();
2608 unsigned MaxELen = Subtarget.getELen();
2609
2610 MVT EltVT = VT.getVectorElementType();
2611 switch (EltVT.SimpleTy) {
2612 default:
2613 llvm_unreachable("unexpected element type for RVV container");
2614 case MVT::i1:
2615 case MVT::i8:
2616 case MVT::i16:
2617 case MVT::i32:
2618 case MVT::i64:
2619 case MVT::bf16:
2620 case MVT::f16:
2621 case MVT::f32:
2622 case MVT::f64: {
2623 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2624 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2625 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2626 unsigned NumElts =
2628 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2629 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2630 return MVT::getScalableVectorVT(EltVT, NumElts);
2631 }
2632 }
2633}
2634
2636 const RISCVSubtarget &Subtarget) {
2638 Subtarget);
2639}
2640
2642 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2643}
2644
2645// Grow V to consume an entire RVV register.
2647 const RISCVSubtarget &Subtarget) {
2648 assert(VT.isScalableVector() &&
2649 "Expected to convert into a scalable vector!");
2650 assert(V.getValueType().isFixedLengthVector() &&
2651 "Expected a fixed length vector operand!");
2652 SDLoc DL(V);
2653 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2654 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2655}
2656
2657// Shrink V so it's just big enough to maintain a VT's worth of data.
2659 const RISCVSubtarget &Subtarget) {
2661 "Expected to convert into a fixed length vector!");
2662 assert(V.getValueType().isScalableVector() &&
2663 "Expected a scalable vector operand!");
2664 SDLoc DL(V);
2665 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2666 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2667}
2668
2669/// Return the type of the mask type suitable for masking the provided
2670/// vector type. This is simply an i1 element type vector of the same
2671/// (possibly scalable) length.
2672static MVT getMaskTypeFor(MVT VecVT) {
2673 assert(VecVT.isVector());
2675 return MVT::getVectorVT(MVT::i1, EC);
2676}
2677
2678/// Creates an all ones mask suitable for masking a vector of type VecTy with
2679/// vector length VL. .
2680static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2681 SelectionDAG &DAG) {
2682 MVT MaskVT = getMaskTypeFor(VecVT);
2683 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2684}
2685
2686static std::pair<SDValue, SDValue>
2688 const RISCVSubtarget &Subtarget) {
2689 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2690 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2691 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2692 return {Mask, VL};
2693}
2694
2695static std::pair<SDValue, SDValue>
2696getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2697 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2698 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2699 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2700 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2701 return {Mask, VL};
2702}
2703
2704// Gets the two common "VL" operands: an all-ones mask and the vector length.
2705// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2706// the vector type that the fixed-length vector is contained in. Otherwise if
2707// VecVT is scalable, then ContainerVT should be the same as VecVT.
2708static std::pair<SDValue, SDValue>
2709getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2710 const RISCVSubtarget &Subtarget) {
2711 if (VecVT.isFixedLengthVector())
2712 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2713 Subtarget);
2714 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2715 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2716}
2717
2719 SelectionDAG &DAG) const {
2720 assert(VecVT.isScalableVector() && "Expected scalable vector");
2721 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2722 VecVT.getVectorElementCount());
2723}
2724
2725std::pair<unsigned, unsigned>
2727 const RISCVSubtarget &Subtarget) {
2728 assert(VecVT.isScalableVector() && "Expected scalable vector");
2729
2730 unsigned EltSize = VecVT.getScalarSizeInBits();
2731 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2732
2733 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2734 unsigned MaxVLMAX =
2735 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2736
2737 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2738 unsigned MinVLMAX =
2739 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2740
2741 return std::make_pair(MinVLMAX, MaxVLMAX);
2742}
2743
2744// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2745// of either is (currently) supported. This can get us into an infinite loop
2746// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2747// as a ..., etc.
2748// Until either (or both) of these can reliably lower any node, reporting that
2749// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2750// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2751// which is not desirable.
2753 EVT VT, unsigned DefinedValues) const {
2754 return false;
2755}
2756
2758 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2759 // implementation-defined.
2760 if (!VT.isVector())
2762 unsigned DLenFactor = Subtarget.getDLenFactor();
2763 unsigned Cost;
2764 if (VT.isScalableVector()) {
2765 unsigned LMul;
2766 bool Fractional;
2767 std::tie(LMul, Fractional) =
2769 if (Fractional)
2770 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2771 else
2772 Cost = (LMul * DLenFactor);
2773 } else {
2774 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2775 }
2776 return Cost;
2777}
2778
2779
2780/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2781/// is generally quadratic in the number of vreg implied by LMUL. Note that
2782/// operand (index and possibly mask) are handled separately.
2784 return getLMULCost(VT) * getLMULCost(VT);
2785}
2786
2787/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2788/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2789/// or may track the vrgather.vv cost. It is implementation-dependent.
2791 return getLMULCost(VT);
2792}
2793
2794/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2795/// for the type VT. (This does not cover the vslide1up or vslide1down
2796/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2797/// or may track the vrgather.vv cost. It is implementation-dependent.
2799 return getLMULCost(VT);
2800}
2801
2802/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2803/// for the type VT. (This does not cover the vslide1up or vslide1down
2804/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2805/// or may track the vrgather.vv cost. It is implementation-dependent.
2807 return getLMULCost(VT);
2808}
2809
2811 const RISCVSubtarget &Subtarget) {
2812 // RISC-V FP-to-int conversions saturate to the destination register size, but
2813 // don't produce 0 for nan. We can use a conversion instruction and fix the
2814 // nan case with a compare and a select.
2815 SDValue Src = Op.getOperand(0);
2816
2817 MVT DstVT = Op.getSimpleValueType();
2818 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2819
2820 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2821
2822 if (!DstVT.isVector()) {
2823 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2824 // the result.
2825 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2826 Src.getValueType() == MVT::bf16) {
2827 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2828 }
2829
2830 unsigned Opc;
2831 if (SatVT == DstVT)
2832 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2833 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2835 else
2836 return SDValue();
2837 // FIXME: Support other SatVTs by clamping before or after the conversion.
2838
2839 SDLoc DL(Op);
2840 SDValue FpToInt = DAG.getNode(
2841 Opc, DL, DstVT, Src,
2843
2844 if (Opc == RISCVISD::FCVT_WU_RV64)
2845 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2846
2847 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2848 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2850 }
2851
2852 // Vectors.
2853
2854 MVT DstEltVT = DstVT.getVectorElementType();
2855 MVT SrcVT = Src.getSimpleValueType();
2856 MVT SrcEltVT = SrcVT.getVectorElementType();
2857 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2858 unsigned DstEltSize = DstEltVT.getSizeInBits();
2859
2860 // Only handle saturating to the destination type.
2861 if (SatVT != DstEltVT)
2862 return SDValue();
2863
2864 MVT DstContainerVT = DstVT;
2865 MVT SrcContainerVT = SrcVT;
2866 if (DstVT.isFixedLengthVector()) {
2867 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2868 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2869 assert(DstContainerVT.getVectorElementCount() ==
2870 SrcContainerVT.getVectorElementCount() &&
2871 "Expected same element count");
2872 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2873 }
2874
2875 SDLoc DL(Op);
2876
2877 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2878
2879 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2880 {Src, Src, DAG.getCondCode(ISD::SETNE),
2881 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2882
2883 // Need to widen by more than 1 step, promote the FP type, then do a widening
2884 // convert.
2885 if (DstEltSize > (2 * SrcEltSize)) {
2886 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2887 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2888 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2889 }
2890
2891 MVT CvtContainerVT = DstContainerVT;
2892 MVT CvtEltVT = DstEltVT;
2893 if (SrcEltSize > (2 * DstEltSize)) {
2894 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2895 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2896 }
2897
2898 unsigned RVVOpc =
2900 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
2901
2902 while (CvtContainerVT != DstContainerVT) {
2903 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
2904 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2905 // Rounding mode here is arbitrary since we aren't shifting out any bits.
2906 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
2908 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
2909 }
2910
2911 SDValue SplatZero = DAG.getNode(
2912 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2913 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2914 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
2915 Res, DAG.getUNDEF(DstContainerVT), VL);
2916
2917 if (DstVT.isFixedLengthVector())
2918 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2919
2920 return Res;
2921}
2922
2924 switch (Opc) {
2925 case ISD::FROUNDEVEN:
2927 case ISD::VP_FROUNDEVEN:
2928 return RISCVFPRndMode::RNE;
2929 case ISD::FTRUNC:
2930 case ISD::STRICT_FTRUNC:
2931 case ISD::VP_FROUNDTOZERO:
2932 return RISCVFPRndMode::RTZ;
2933 case ISD::FFLOOR:
2934 case ISD::STRICT_FFLOOR:
2935 case ISD::VP_FFLOOR:
2936 return RISCVFPRndMode::RDN;
2937 case ISD::FCEIL:
2938 case ISD::STRICT_FCEIL:
2939 case ISD::VP_FCEIL:
2940 return RISCVFPRndMode::RUP;
2941 case ISD::FROUND:
2942 case ISD::STRICT_FROUND:
2943 case ISD::VP_FROUND:
2944 return RISCVFPRndMode::RMM;
2945 case ISD::FRINT:
2946 return RISCVFPRndMode::DYN;
2947 }
2948
2950}
2951
2952// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2953// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2954// the integer domain and back. Taking care to avoid converting values that are
2955// nan or already correct.
2956static SDValue
2958 const RISCVSubtarget &Subtarget) {
2959 MVT VT = Op.getSimpleValueType();
2960 assert(VT.isVector() && "Unexpected type");
2961
2962 SDLoc DL(Op);
2963
2964 SDValue Src = Op.getOperand(0);
2965
2966 MVT ContainerVT = VT;
2967 if (VT.isFixedLengthVector()) {
2968 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2969 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2970 }
2971
2972 SDValue Mask, VL;
2973 if (Op->isVPOpcode()) {
2974 Mask = Op.getOperand(1);
2975 if (VT.isFixedLengthVector())
2976 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
2977 Subtarget);
2978 VL = Op.getOperand(2);
2979 } else {
2980 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2981 }
2982
2983 // Freeze the source since we are increasing the number of uses.
2984 Src = DAG.getFreeze(Src);
2985
2986 // We do the conversion on the absolute value and fix the sign at the end.
2987 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2988
2989 // Determine the largest integer that can be represented exactly. This and
2990 // values larger than it don't have any fractional bits so don't need to
2991 // be converted.
2992 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
2993 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2994 APFloat MaxVal = APFloat(FltSem);
2995 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2996 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2997 SDValue MaxValNode =
2998 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2999 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3000 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3001
3002 // If abs(Src) was larger than MaxVal or nan, keep it.
3003 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3004 Mask =
3005 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3006 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3007 Mask, Mask, VL});
3008
3009 // Truncate to integer and convert back to FP.
3010 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3011 MVT XLenVT = Subtarget.getXLenVT();
3012 SDValue Truncated;
3013
3014 switch (Op.getOpcode()) {
3015 default:
3016 llvm_unreachable("Unexpected opcode");
3017 case ISD::FCEIL:
3018 case ISD::VP_FCEIL:
3019 case ISD::FFLOOR:
3020 case ISD::VP_FFLOOR:
3021 case ISD::FROUND:
3022 case ISD::FROUNDEVEN:
3023 case ISD::VP_FROUND:
3024 case ISD::VP_FROUNDEVEN:
3025 case ISD::VP_FROUNDTOZERO: {
3028 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3029 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3030 break;
3031 }
3032 case ISD::FTRUNC:
3033 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3034 Mask, VL);
3035 break;
3036 case ISD::FRINT:
3037 case ISD::VP_FRINT:
3038 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
3039 break;
3040 case ISD::FNEARBYINT:
3041 case ISD::VP_FNEARBYINT:
3042 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3043 Mask, VL);
3044 break;
3045 }
3046
3047 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3048 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3049 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3050 Mask, VL);
3051
3052 // Restore the original sign so that -0.0 is preserved.
3053 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3054 Src, Src, Mask, VL);
3055
3056 if (!VT.isFixedLengthVector())
3057 return Truncated;
3058
3059 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3060}
3061
3062// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3063// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3064// qNan and coverting the new source to integer and back to FP.
3065static SDValue
3067 const RISCVSubtarget &Subtarget) {
3068 SDLoc DL(Op);
3069 MVT VT = Op.getSimpleValueType();
3070 SDValue Chain = Op.getOperand(0);
3071 SDValue Src = Op.getOperand(1);
3072
3073 MVT ContainerVT = VT;
3074 if (VT.isFixedLengthVector()) {
3075 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3076 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3077 }
3078
3079 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3080
3081 // Freeze the source since we are increasing the number of uses.
3082 Src = DAG.getFreeze(Src);
3083
3084 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3085 MVT MaskVT = Mask.getSimpleValueType();
3087 DAG.getVTList(MaskVT, MVT::Other),
3088 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3089 DAG.getUNDEF(MaskVT), Mask, VL});
3090 Chain = Unorder.getValue(1);
3092 DAG.getVTList(ContainerVT, MVT::Other),
3093 {Chain, Src, Src, Src, Unorder, VL});
3094 Chain = Src.getValue(1);
3095
3096 // We do the conversion on the absolute value and fix the sign at the end.
3097 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3098
3099 // Determine the largest integer that can be represented exactly. This and
3100 // values larger than it don't have any fractional bits so don't need to
3101 // be converted.
3102 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3103 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3104 APFloat MaxVal = APFloat(FltSem);
3105 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3106 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3107 SDValue MaxValNode =
3108 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3109 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3110 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3111
3112 // If abs(Src) was larger than MaxVal or nan, keep it.
3113 Mask = DAG.getNode(
3114 RISCVISD::SETCC_VL, DL, MaskVT,
3115 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3116
3117 // Truncate to integer and convert back to FP.
3118 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3119 MVT XLenVT = Subtarget.getXLenVT();
3120 SDValue Truncated;
3121
3122 switch (Op.getOpcode()) {
3123 default:
3124 llvm_unreachable("Unexpected opcode");
3125 case ISD::STRICT_FCEIL:
3126 case ISD::STRICT_FFLOOR:
3127 case ISD::STRICT_FROUND:
3131 Truncated = DAG.getNode(
3132 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3133 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3134 break;
3135 }
3136 case ISD::STRICT_FTRUNC:
3137 Truncated =
3139 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3140 break;
3143 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3144 Mask, VL);
3145 break;
3146 }
3147 Chain = Truncated.getValue(1);
3148
3149 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3150 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3151 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3152 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3153 Truncated, Mask, VL);
3154 Chain = Truncated.getValue(1);
3155 }
3156
3157 // Restore the original sign so that -0.0 is preserved.
3158 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3159 Src, Src, Mask, VL);
3160
3161 if (VT.isFixedLengthVector())
3162 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3163 return DAG.getMergeValues({Truncated, Chain}, DL);
3164}
3165
3166static SDValue
3168 const RISCVSubtarget &Subtarget) {
3169 MVT VT = Op.getSimpleValueType();
3170 if (VT.isVector())
3171 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3172
3173 if (DAG.shouldOptForSize())
3174 return SDValue();
3175
3176 SDLoc DL(Op);
3177 SDValue Src = Op.getOperand(0);
3178
3179 // Create an integer the size of the mantissa with the MSB set. This and all
3180 // values larger than it don't have any fractional bits so don't need to be
3181 // converted.
3182 const fltSemantics &FltSem = VT.getFltSemantics();
3183 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3184 APFloat MaxVal = APFloat(FltSem);
3185 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3186 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3187 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3188
3190 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3191 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3192}
3193
3194// Expand vector LRINT and LLRINT by converting to the integer domain.
3196 const RISCVSubtarget &Subtarget) {
3197 MVT VT = Op.getSimpleValueType();
3198 assert(VT.isVector() && "Unexpected type");
3199
3200 SDLoc DL(Op);
3201 SDValue Src = Op.getOperand(0);
3202 MVT ContainerVT = VT;
3203
3204 if (VT.isFixedLengthVector()) {
3205 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3206 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3207 }
3208
3209 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3210 SDValue Truncated =
3211 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3212
3213 if (!VT.isFixedLengthVector())
3214 return Truncated;
3215
3216 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3217}
3218
3219static SDValue
3221 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3222 SDValue Offset, SDValue Mask, SDValue VL,
3224 if (Passthru.isUndef())
3226 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3227 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3228 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3229}
3230
3231static SDValue
3232getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3233 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3234 SDValue VL,
3236 if (Passthru.isUndef())
3238 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3239 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3240 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3241}
3242
3243static MVT getLMUL1VT(MVT VT) {
3245 "Unexpected vector MVT");
3249}
3250
3254 int64_t Addend;
3255};
3256
3257static std::optional<APInt> getExactInteger(const APFloat &APF,
3259 // We will use a SINT_TO_FP to materialize this constant so we should use a
3260 // signed APSInt here.
3261 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3262 // We use an arbitrary rounding mode here. If a floating-point is an exact
3263 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3264 // the rounding mode changes the output value, then it is not an exact
3265 // integer.
3267 bool IsExact;
3268 // If it is out of signed integer range, it will return an invalid operation.
3269 // If it is not an exact integer, IsExact is false.
3270 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3272 !IsExact)
3273 return std::nullopt;
3274 return ValInt.extractBits(BitWidth, 0);
3275}
3276
3277// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3278// to the (non-zero) step S and start value X. This can be then lowered as the
3279// RVV sequence (VID * S) + X, for example.
3280// The step S is represented as an integer numerator divided by a positive
3281// denominator. Note that the implementation currently only identifies
3282// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3283// cannot detect 2/3, for example.
3284// Note that this method will also match potentially unappealing index
3285// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3286// determine whether this is worth generating code for.
3287//
3288// EltSizeInBits is the size of the type that the sequence will be calculated
3289// in, i.e. SEW for build_vectors or XLEN for address calculations.
3290static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3291 unsigned EltSizeInBits) {
3292 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3293 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3294 return std::nullopt;
3295 bool IsInteger = Op.getValueType().isInteger();
3296
3297 std::optional<unsigned> SeqStepDenom;
3298 std::optional<APInt> SeqStepNum;
3299 std::optional<APInt> SeqAddend;
3300 std::optional<std::pair<APInt, unsigned>> PrevElt;
3301 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3302
3303 // First extract the ops into a list of constant integer values. This may not
3304 // be possible for floats if they're not all representable as integers.
3306 const unsigned OpSize = Op.getScalarValueSizeInBits();
3307 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3308 if (Elt.isUndef()) {
3309 Elts[Idx] = std::nullopt;
3310 continue;
3311 }
3312 if (IsInteger) {
3313 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3314 } else {
3315 auto ExactInteger =
3316 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3317 if (!ExactInteger)
3318 return std::nullopt;
3319 Elts[Idx] = *ExactInteger;
3320 }
3321 }
3322
3323 for (auto [Idx, Elt] : enumerate(Elts)) {
3324 // Assume undef elements match the sequence; we just have to be careful
3325 // when interpolating across them.
3326 if (!Elt)
3327 continue;
3328
3329 if (PrevElt) {
3330 // Calculate the step since the last non-undef element, and ensure
3331 // it's consistent across the entire sequence.
3332 unsigned IdxDiff = Idx - PrevElt->second;
3333 APInt ValDiff = *Elt - PrevElt->first;
3334
3335 // A zero-value value difference means that we're somewhere in the middle
3336 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3337 // step change before evaluating the sequence.
3338 if (ValDiff == 0)
3339 continue;
3340
3341 int64_t Remainder = ValDiff.srem(IdxDiff);
3342 // Normalize the step if it's greater than 1.
3343 if (Remainder != ValDiff.getSExtValue()) {
3344 // The difference must cleanly divide the element span.
3345 if (Remainder != 0)
3346 return std::nullopt;
3347 ValDiff = ValDiff.sdiv(IdxDiff);
3348 IdxDiff = 1;
3349 }
3350
3351 if (!SeqStepNum)
3352 SeqStepNum = ValDiff;
3353 else if (ValDiff != SeqStepNum)
3354 return std::nullopt;
3355
3356 if (!SeqStepDenom)
3357 SeqStepDenom = IdxDiff;
3358 else if (IdxDiff != *SeqStepDenom)
3359 return std::nullopt;
3360 }
3361
3362 // Record this non-undef element for later.
3363 if (!PrevElt || PrevElt->first != *Elt)
3364 PrevElt = std::make_pair(*Elt, Idx);
3365 }
3366
3367 // We need to have logged a step for this to count as a legal index sequence.
3368 if (!SeqStepNum || !SeqStepDenom)
3369 return std::nullopt;
3370
3371 // Loop back through the sequence and validate elements we might have skipped
3372 // while waiting for a valid step. While doing this, log any sequence addend.
3373 for (auto [Idx, Elt] : enumerate(Elts)) {
3374 if (!Elt)
3375 continue;
3376 APInt ExpectedVal =
3377 (APInt(EltSizeInBits, Idx) * *SeqStepNum).sdiv(*SeqStepDenom);
3378
3379 APInt Addend = *Elt - ExpectedVal;
3380 if (!SeqAddend)
3381 SeqAddend = Addend;
3382 else if (Addend != SeqAddend)
3383 return std::nullopt;
3384 }
3385
3386 assert(SeqAddend && "Must have an addend if we have a step");
3387
3388 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3389 SeqAddend->getSExtValue()};
3390}
3391
3392// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3393// and lower it as a VRGATHER_VX_VL from the source vector.
3394static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3395 SelectionDAG &DAG,
3396 const RISCVSubtarget &Subtarget) {
3397 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3398 return SDValue();
3399 SDValue Vec = SplatVal.getOperand(0);
3400 // Only perform this optimization on vectors of the same size for simplicity.
3401 // Don't perform this optimization for i1 vectors.
3402 // FIXME: Support i1 vectors, maybe by promoting to i8?
3403 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3404 return SDValue();
3405 SDValue Idx = SplatVal.getOperand(1);
3406 // The index must be a legal type.
3407 if (Idx.getValueType() != Subtarget.getXLenVT())
3408 return SDValue();
3409
3410 MVT ContainerVT = VT;
3411 if (VT.isFixedLengthVector()) {
3412 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3413 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3414 }
3415
3416 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3417
3418 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3419 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3420
3421 if (!VT.isFixedLengthVector())
3422 return Gather;
3423
3424 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3425}
3426
3427
3428/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3429/// which constitute a large proportion of the elements. In such cases we can
3430/// splat a vector with the dominant element and make up the shortfall with
3431/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3432/// Note that this includes vectors of 2 elements by association. The
3433/// upper-most element is the "dominant" one, allowing us to use a splat to
3434/// "insert" the upper element, and an insert of the lower element at position
3435/// 0, which improves codegen.
3437 const RISCVSubtarget &Subtarget) {
3438 MVT VT = Op.getSimpleValueType();
3439 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3440
3441 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3442
3443 SDLoc DL(Op);
3444 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3445
3446 MVT XLenVT = Subtarget.getXLenVT();
3447 unsigned NumElts = Op.getNumOperands();
3448
3449 SDValue DominantValue;
3450 unsigned MostCommonCount = 0;
3451 DenseMap<SDValue, unsigned> ValueCounts;
3452 unsigned NumUndefElts =
3453 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3454
3455 // Track the number of scalar loads we know we'd be inserting, estimated as
3456 // any non-zero floating-point constant. Other kinds of element are either
3457 // already in registers or are materialized on demand. The threshold at which
3458 // a vector load is more desirable than several scalar materializion and
3459 // vector-insertion instructions is not known.
3460 unsigned NumScalarLoads = 0;
3461
3462 for (SDValue V : Op->op_values()) {
3463 if (V.isUndef())
3464 continue;
3465
3466 ValueCounts.insert(std::make_pair(V, 0));
3467 unsigned &Count = ValueCounts[V];
3468 if (0 == Count)
3469 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3470 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3471
3472 // Is this value dominant? In case of a tie, prefer the highest element as
3473 // it's cheaper to insert near the beginning of a vector than it is at the
3474 // end.
3475 if (++Count >= MostCommonCount) {
3476 DominantValue = V;
3477 MostCommonCount = Count;
3478 }
3479 }
3480
3481 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3482 unsigned NumDefElts = NumElts - NumUndefElts;
3483 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3484
3485 // Don't perform this optimization when optimizing for size, since
3486 // materializing elements and inserting them tends to cause code bloat.
3487 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3488 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3489 ((MostCommonCount > DominantValueCountThreshold) ||
3490 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3491 // Start by splatting the most common element.
3492 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3493
3494 DenseSet<SDValue> Processed{DominantValue};
3495
3496 // We can handle an insert into the last element (of a splat) via
3497 // v(f)slide1down. This is slightly better than the vslideup insert
3498 // lowering as it avoids the need for a vector group temporary. It
3499 // is also better than using vmerge.vx as it avoids the need to
3500 // materialize the mask in a vector register.
3501 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3502 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3503 LastOp != DominantValue) {
3504 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3505 auto OpCode =
3507 if (!VT.isFloatingPoint())
3508 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3509 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3510 LastOp, Mask, VL);
3511 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3512 Processed.insert(LastOp);
3513 }
3514
3515 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3516 for (const auto &OpIdx : enumerate(Op->ops())) {
3517 const SDValue &V = OpIdx.value();
3518 if (V.isUndef() || !Processed.insert(V).second)
3519 continue;
3520 if (ValueCounts[V] == 1) {
3521 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3522 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3523 } else {
3524 // Blend in all instances of this value using a VSELECT, using a
3525 // mask where each bit signals whether that element is the one
3526 // we're after.
3528 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3529 return DAG.getConstant(V == V1, DL, XLenVT);
3530 });
3531 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3532 DAG.getBuildVector(SelMaskTy, DL, Ops),
3533 DAG.getSplatBuildVector(VT, DL, V), Vec);
3534 }
3535 }
3536
3537 return Vec;
3538 }
3539
3540 return SDValue();
3541}
3542
3544 const RISCVSubtarget &Subtarget) {
3545 MVT VT = Op.getSimpleValueType();
3546 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3547
3548 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3549
3550 SDLoc DL(Op);
3551 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3552
3553 MVT XLenVT = Subtarget.getXLenVT();
3554 unsigned NumElts = Op.getNumOperands();
3555
3556 if (VT.getVectorElementType() == MVT::i1) {
3557 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3558 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3559 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3560 }
3561
3562 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3563 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3564 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3565 }
3566
3567 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3568 // scalar integer chunks whose bit-width depends on the number of mask
3569 // bits and XLEN.
3570 // First, determine the most appropriate scalar integer type to use. This
3571 // is at most XLenVT, but may be shrunk to a smaller vector element type
3572 // according to the size of the final vector - use i8 chunks rather than
3573 // XLenVT if we're producing a v8i1. This results in more consistent
3574 // codegen across RV32 and RV64.
3575 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3576 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3577 // If we have to use more than one INSERT_VECTOR_ELT then this
3578 // optimization is likely to increase code size; avoid peforming it in
3579 // such a case. We can use a load from a constant pool in this case.
3580 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3581 return SDValue();
3582 // Now we can create our integer vector type. Note that it may be larger
3583 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3584 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3585 MVT IntegerViaVecVT =
3586 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3587 IntegerViaVecElts);
3588
3589 uint64_t Bits = 0;
3590 unsigned BitPos = 0, IntegerEltIdx = 0;
3591 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3592
3593 for (unsigned I = 0; I < NumElts;) {
3594 SDValue V = Op.getOperand(I);
3595 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3596 Bits |= ((uint64_t)BitValue << BitPos);
3597 ++BitPos;
3598 ++I;
3599
3600 // Once we accumulate enough bits to fill our scalar type or process the
3601 // last element, insert into our vector and clear our accumulated data.
3602 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3603 if (NumViaIntegerBits <= 32)
3604 Bits = SignExtend64<32>(Bits);
3605 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
3606 Elts[IntegerEltIdx] = Elt;
3607 Bits = 0;
3608 BitPos = 0;
3609 IntegerEltIdx++;
3610 }
3611 }
3612
3613 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3614
3615 if (NumElts < NumViaIntegerBits) {
3616 // If we're producing a smaller vector than our minimum legal integer
3617 // type, bitcast to the equivalent (known-legal) mask type, and extract
3618 // our final mask.
3619 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3620 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3621 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3622 DAG.getConstant(0, DL, XLenVT));
3623 } else {
3624 // Else we must have produced an integer type with the same size as the
3625 // mask type; bitcast for the final result.
3626 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3627 Vec = DAG.getBitcast(VT, Vec);
3628 }
3629
3630 return Vec;
3631 }
3632
3633 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3634 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3636 if (!VT.isFloatingPoint())
3637 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3638 Splat =
3639 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3640 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3641 }
3642
3643 // Try and match index sequences, which we can lower to the vid instruction
3644 // with optional modifications. An all-undef vector is matched by
3645 // getSplatValue, above.
3646 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3647 int64_t StepNumerator = SimpleVID->StepNumerator;
3648 unsigned StepDenominator = SimpleVID->StepDenominator;
3649 int64_t Addend = SimpleVID->Addend;
3650
3651 assert(StepNumerator != 0 && "Invalid step");
3652 bool Negate = false;
3653 int64_t SplatStepVal = StepNumerator;
3654 unsigned StepOpcode = ISD::MUL;
3655 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3656 // anyway as the shift of 63 won't fit in uimm5.
3657 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3658 isPowerOf2_64(std::abs(StepNumerator))) {
3659 Negate = StepNumerator < 0;
3660 StepOpcode = ISD::SHL;
3661 SplatStepVal = Log2_64(std::abs(StepNumerator));
3662 }
3663
3664 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3665 // threshold since it's the immediate value many RVV instructions accept.
3666 // There is no vmul.vi instruction so ensure multiply constant can fit in
3667 // a single addi instruction.
3668 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3669 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3670 isPowerOf2_32(StepDenominator) &&
3671 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3672 MVT VIDVT =
3674 MVT VIDContainerVT =
3675 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3676 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3677 // Convert right out of the scalable type so we can use standard ISD
3678 // nodes for the rest of the computation. If we used scalable types with
3679 // these, we'd lose the fixed-length vector info and generate worse
3680 // vsetvli code.
3681 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3682 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3683 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3684 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3685 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3686 }
3687 if (StepDenominator != 1) {
3688 SDValue SplatStep =
3689 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3690 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3691 }
3692 if (Addend != 0 || Negate) {
3693 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3694 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3695 VID);
3696 }
3697 if (VT.isFloatingPoint()) {
3698 // TODO: Use vfwcvt to reduce register pressure.
3699 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3700 }
3701 return VID;
3702 }
3703 }
3704
3705 // For very small build_vectors, use a single scalar insert of a constant.
3706 // TODO: Base this on constant rematerialization cost, not size.
3707 const unsigned EltBitSize = VT.getScalarSizeInBits();
3708 if (VT.getSizeInBits() <= 32 &&
3710 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3711 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3712 "Unexpected sequence type");
3713 // If we can use the original VL with the modified element type, this
3714 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3715 // be moved into InsertVSETVLI?
3716 unsigned ViaVecLen =
3717 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3718 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3719
3720 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3721 uint64_t SplatValue = 0;
3722 // Construct the amalgamated value at this larger vector type.
3723 for (const auto &OpIdx : enumerate(Op->op_values())) {
3724 const auto &SeqV = OpIdx.value();
3725 if (!SeqV.isUndef())
3726 SplatValue |=
3727 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3728 }
3729
3730 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3731 // achieve better constant materializion.
3732 // On RV32, we need to sign-extend to use getSignedConstant.
3733 if (ViaIntVT == MVT::i32)
3734 SplatValue = SignExtend64<32>(SplatValue);
3735
3736 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3737 DAG.getUNDEF(ViaVecVT),
3738 DAG.getSignedConstant(SplatValue, DL, XLenVT),
3739 DAG.getVectorIdxConstant(0, DL));
3740 if (ViaVecLen != 1)
3742 MVT::getVectorVT(ViaIntVT, 1), Vec,
3743 DAG.getConstant(0, DL, XLenVT));
3744 return DAG.getBitcast(VT, Vec);
3745 }
3746
3747
3748 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3749 // when re-interpreted as a vector with a larger element type. For example,
3750 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3751 // could be instead splat as
3752 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3753 // TODO: This optimization could also work on non-constant splats, but it
3754 // would require bit-manipulation instructions to construct the splat value.
3755 SmallVector<SDValue> Sequence;
3756 const auto *BV = cast<BuildVectorSDNode>(Op);
3757 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3759 BV->getRepeatedSequence(Sequence) &&
3760 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3761 unsigned SeqLen = Sequence.size();
3762 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3763 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3764 ViaIntVT == MVT::i64) &&
3765 "Unexpected sequence type");
3766
3767 // If we can use the original VL with the modified element type, this
3768 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3769 // be moved into InsertVSETVLI?
3770 const unsigned RequiredVL = NumElts / SeqLen;
3771 const unsigned ViaVecLen =
3772 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3773 NumElts : RequiredVL;
3774 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3775
3776 unsigned EltIdx = 0;
3777 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3778 uint64_t SplatValue = 0;
3779 // Construct the amalgamated value which can be splatted as this larger
3780 // vector type.
3781 for (const auto &SeqV : Sequence) {
3782 if (!SeqV.isUndef())
3783 SplatValue |=
3784 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3785 EltIdx++;
3786 }
3787
3788 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3789 // achieve better constant materializion.
3790 // On RV32, we need to sign-extend to use getSignedConstant.
3791 if (ViaIntVT == MVT::i32)
3792 SplatValue = SignExtend64<32>(SplatValue);
3793
3794 // Since we can't introduce illegal i64 types at this stage, we can only
3795 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3796 // way we can use RVV instructions to splat.
3797 assert((ViaIntVT.bitsLE(XLenVT) ||
3798 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3799 "Unexpected bitcast sequence");
3800 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3801 SDValue ViaVL =
3802 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3803 MVT ViaContainerVT =
3804 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3805 SDValue Splat =
3806 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3807 DAG.getUNDEF(ViaContainerVT),
3808 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
3809 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3810 if (ViaVecLen != RequiredVL)
3812 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3813 DAG.getConstant(0, DL, XLenVT));
3814 return DAG.getBitcast(VT, Splat);
3815 }
3816 }
3817
3818 // If the number of signbits allows, see if we can lower as a <N x i8>.
3819 // Our main goal here is to reduce LMUL (and thus work) required to
3820 // build the constant, but we will also narrow if the resulting
3821 // narrow vector is known to materialize cheaply.
3822 // TODO: We really should be costing the smaller vector. There are
3823 // profitable cases this misses.
3824 if (EltBitSize > 8 && VT.isInteger() &&
3825 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
3826 DAG.ComputeMaxSignificantBits(Op) <= 8) {
3827 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3828 DL, Op->ops());
3829 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3830 Source, DAG, Subtarget);
3831 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3832 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3833 }
3834
3835 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3836 return Res;
3837
3838 // For constant vectors, use generic constant pool lowering. Otherwise,
3839 // we'd have to materialize constants in GPRs just to move them into the
3840 // vector.
3841 return SDValue();
3842}
3843
3844static unsigned getPACKOpcode(unsigned DestBW,
3845 const RISCVSubtarget &Subtarget) {
3846 switch (DestBW) {
3847 default:
3848 llvm_unreachable("Unsupported pack size");
3849 case 16:
3850 return RISCV::PACKH;
3851 case 32:
3852 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
3853 case 64:
3854 assert(Subtarget.is64Bit());
3855 return RISCV::PACK;
3856 }
3857}
3858
3859/// Double the element size of the build vector to reduce the number
3860/// of vslide1down in the build vector chain. In the worst case, this
3861/// trades three scalar operations for 1 vector operation. Scalar
3862/// operations are generally lower latency, and for out-of-order cores
3863/// we also benefit from additional parallelism.
3865 const RISCVSubtarget &Subtarget) {
3866 SDLoc DL(Op);
3867 MVT VT = Op.getSimpleValueType();
3868 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3869 MVT ElemVT = VT.getVectorElementType();
3870 if (!ElemVT.isInteger())
3871 return SDValue();
3872
3873 // TODO: Relax these architectural restrictions, possibly with costing
3874 // of the actual instructions required.
3875 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
3876 return SDValue();
3877
3878 unsigned NumElts = VT.getVectorNumElements();
3879 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
3880 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
3881 NumElts % 2 != 0)
3882 return SDValue();
3883
3884 // Produce [B,A] packed into a type twice as wide. Note that all
3885 // scalars are XLenVT, possibly masked (see below).
3886 MVT XLenVT = Subtarget.getXLenVT();
3887 SDValue Mask = DAG.getConstant(
3888 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
3889 auto pack = [&](SDValue A, SDValue B) {
3890 // Bias the scheduling of the inserted operations to near the
3891 // definition of the element - this tends to reduce register
3892 // pressure overall.
3893 SDLoc ElemDL(B);
3894 if (Subtarget.hasStdExtZbkb())
3895 // Note that we're relying on the high bits of the result being
3896 // don't care. For PACKW, the result is *sign* extended.
3897 return SDValue(
3898 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
3899 ElemDL, XLenVT, A, B),
3900 0);
3901
3902 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
3903 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
3904 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
3905 SDNodeFlags Flags;
3906 Flags.setDisjoint(true);
3907 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
3908 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt), Flags);
3909 };
3910
3911 SmallVector<SDValue> NewOperands;
3912 NewOperands.reserve(NumElts / 2);
3913 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
3914 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
3915 assert(NumElts == NewOperands.size() * 2);
3916 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
3917 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
3918 return DAG.getNode(ISD::BITCAST, DL, VT,
3919 DAG.getBuildVector(WideVecVT, DL, NewOperands));
3920}
3921
3922// Convert to an vXf16 build_vector to vXi16 with bitcasts.
3924 MVT VT = Op.getSimpleValueType();
3925 MVT IVT = VT.changeVectorElementType(MVT::i16);
3927 for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I)
3928 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
3929 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), IVT, NewOps);
3930 return DAG.getBitcast(VT, Res);
3931}
3932
3934 const RISCVSubtarget &Subtarget) {
3935 MVT VT = Op.getSimpleValueType();
3936 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3937
3938 // If we don't have scalar f16, we need to bitcast to an i16 vector.
3939 if (VT.getVectorElementType() == MVT::f16 &&
3940 !Subtarget.hasStdExtZfhmin())
3941 return lowerBUILD_VECTORvXf16(Op, DAG);
3942
3943 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3945 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3946
3947 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3948
3949 SDLoc DL(Op);
3950 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3951
3952 MVT XLenVT = Subtarget.getXLenVT();
3953
3954 if (VT.getVectorElementType() == MVT::i1) {
3955 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3956 // vector type, we have a legal equivalently-sized i8 type, so we can use
3957 // that.
3958 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3959 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3960
3961 SDValue WideVec;
3962 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3963 // For a splat, perform a scalar truncate before creating the wider
3964 // vector.
3965 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
3966 DAG.getConstant(1, DL, Splat.getValueType()));
3967 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3968 } else {
3969 SmallVector<SDValue, 8> Ops(Op->op_values());
3970 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3971 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3972 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3973 }
3974
3975 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3976 }
3977
3978 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3979 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3980 return Gather;
3981 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3983 if (!VT.isFloatingPoint())
3984 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3985 Splat =
3986 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3987 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3988 }
3989
3990 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3991 return Res;
3992
3993 // If we're compiling for an exact VLEN value, we can split our work per
3994 // register in the register group.
3995 if (const auto VLen = Subtarget.getRealVLen();
3996 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
3997 MVT ElemVT = VT.getVectorElementType();
3998 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
3999 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4000 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4001 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4002 assert(M1VT == getLMUL1VT(M1VT));
4003
4004 // The following semantically builds up a fixed length concat_vector
4005 // of the component build_vectors. We eagerly lower to scalable and
4006 // insert_subvector here to avoid DAG combining it back to a large
4007 // build_vector.
4008 SmallVector<SDValue> BuildVectorOps(Op->ops());
4009 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4010 SDValue Vec = DAG.getUNDEF(ContainerVT);
4011 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4012 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4013 SDValue SubBV =
4014 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4015 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4016 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4017 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
4018 DAG.getVectorIdxConstant(InsertIdx, DL));
4019 }
4020 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4021 }
4022
4023 // If we're about to resort to vslide1down (or stack usage), pack our
4024 // elements into the widest scalar type we can. This will force a VL/VTYPE
4025 // toggle, but reduces the critical path, the number of vslide1down ops
4026 // required, and possibly enables scalar folds of the values.
4027 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4028 return Res;
4029
4030 // For m1 vectors, if we have non-undef values in both halves of our vector,
4031 // split the vector into low and high halves, build them separately, then
4032 // use a vselect to combine them. For long vectors, this cuts the critical
4033 // path of the vslide1down sequence in half, and gives us an opportunity
4034 // to special case each half independently. Note that we don't change the
4035 // length of the sub-vectors here, so if both fallback to the generic
4036 // vslide1down path, we should be able to fold the vselect into the final
4037 // vslidedown (for the undef tail) for the first half w/ masking.
4038 unsigned NumElts = VT.getVectorNumElements();
4039 unsigned NumUndefElts =
4040 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4041 unsigned NumDefElts = NumElts - NumUndefElts;
4042 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4043 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
4044 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4045 SmallVector<SDValue> MaskVals;
4046 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4047 SubVecAOps.reserve(NumElts);
4048 SubVecBOps.reserve(NumElts);
4049 for (unsigned i = 0; i < NumElts; i++) {
4050 SDValue Elem = Op->getOperand(i);
4051 if (i < NumElts / 2) {
4052 SubVecAOps.push_back(Elem);
4053 SubVecBOps.push_back(UndefElem);
4054 } else {
4055 SubVecAOps.push_back(UndefElem);
4056 SubVecBOps.push_back(Elem);
4057 }
4058 bool SelectMaskVal = (i < NumElts / 2);
4059 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4060 }
4061 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4062 MaskVals.size() == NumElts);
4063
4064 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4065 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4066 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4067 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4068 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4069 }
4070
4071 // Cap the cost at a value linear to the number of elements in the vector.
4072 // The default lowering is to use the stack. The vector store + scalar loads
4073 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4074 // being (at least) linear in LMUL. As a result, using the vslidedown
4075 // lowering for every element ends up being VL*LMUL..
4076 // TODO: Should we be directly costing the stack alternative? Doing so might
4077 // give us a more accurate upper bound.
4078 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4079
4080 // TODO: unify with TTI getSlideCost.
4081 InstructionCost PerSlideCost = 1;
4082 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4083 default: break;
4085 PerSlideCost = 2;
4086 break;
4088 PerSlideCost = 4;
4089 break;
4091 PerSlideCost = 8;
4092 break;
4093 }
4094
4095 // TODO: Should we be using the build instseq then cost + evaluate scheme
4096 // we use for integer constants here?
4097 unsigned UndefCount = 0;
4098 for (const SDValue &V : Op->ops()) {
4099 if (V.isUndef()) {
4100 UndefCount++;
4101 continue;
4102 }
4103 if (UndefCount) {
4104 LinearBudget -= PerSlideCost;
4105 UndefCount = 0;
4106 }
4107 LinearBudget -= PerSlideCost;
4108 }
4109 if (UndefCount) {
4110 LinearBudget -= PerSlideCost;
4111 }
4112
4113 if (LinearBudget < 0)
4114 return SDValue();
4115
4116 assert((!VT.isFloatingPoint() ||
4117 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4118 "Illegal type which will result in reserved encoding");
4119
4120 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4121
4122 SDValue Vec;
4123 UndefCount = 0;
4124 for (SDValue V : Op->ops()) {
4125 if (V.isUndef()) {
4126 UndefCount++;
4127 continue;
4128 }
4129
4130 // Start our sequence with a TA splat in the hopes that hardware is able to
4131 // recognize there's no dependency on the prior value of our temporary
4132 // register.
4133 if (!Vec) {
4134 Vec = DAG.getSplatVector(VT, DL, V);
4135 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4136 UndefCount = 0;
4137 continue;
4138 }
4139
4140 if (UndefCount) {
4141 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4142 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4143 Vec, Offset, Mask, VL, Policy);
4144 UndefCount = 0;
4145 }
4146 auto OpCode =
4148 if (!VT.isFloatingPoint())
4149 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4150 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4151 V, Mask, VL);
4152 }
4153 if (UndefCount) {
4154 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4155 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4156 Vec, Offset, Mask, VL, Policy);
4157 }
4158 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4159}
4160
4161static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4163 SelectionDAG &DAG) {
4164 if (!Passthru)
4165 Passthru = DAG.getUNDEF(VT);
4166 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4167 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4168 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4169 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4170 // node in order to try and match RVV vector/scalar instructions.
4171 if ((LoC >> 31) == HiC)
4172 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4173
4174 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4175 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4176 // vlmax vsetvli or vsetivli to change the VL.
4177 // FIXME: Support larger constants?
4178 // FIXME: Support non-constant VLs by saturating?
4179 if (LoC == HiC) {
4180 SDValue NewVL;
4181 if (isAllOnesConstant(VL) ||
4182 (isa<RegisterSDNode>(VL) &&
4183 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4184 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4185 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4186 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4187
4188 if (NewVL) {
4189 MVT InterVT =
4190 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4191 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4192 DAG.getUNDEF(InterVT), Lo, NewVL);
4193 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4194 }
4195 }
4196 }
4197
4198 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4199 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4200 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4201 Hi.getConstantOperandVal(1) == 31)
4202 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4203
4204 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4205 // even if it might be sign extended.
4206 if (Hi.isUndef())
4207 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4208
4209 // Fall back to a stack store and stride x0 vector load.
4210 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4211 Hi, VL);
4212}
4213
4214// Called by type legalization to handle splat of i64 on RV32.
4215// FIXME: We can optimize this when the type has sign or zero bits in one
4216// of the halves.
4217static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4218 SDValue Scalar, SDValue VL,
4219 SelectionDAG &DAG) {
4220 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4221 SDValue Lo, Hi;
4222 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4223 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4224}
4225
4226// This function lowers a splat of a scalar operand Splat with the vector
4227// length VL. It ensures the final sequence is type legal, which is useful when
4228// lowering a splat after type legalization.
4229static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4230 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4231 const RISCVSubtarget &Subtarget) {
4232 bool HasPassthru = Passthru && !Passthru.isUndef();
4233 if (!HasPassthru && !Passthru)
4234 Passthru = DAG.getUNDEF(VT);
4235 if (VT.isFloatingPoint())
4236 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4237
4238 MVT XLenVT = Subtarget.getXLenVT();
4239
4240 // Simplest case is that the operand needs to be promoted to XLenVT.
4241 if (Scalar.getValueType().bitsLE(XLenVT)) {
4242 // If the operand is a constant, sign extend to increase our chances
4243 // of being able to use a .vi instruction. ANY_EXTEND would become a
4244 // a zero extend and the simm5 check in isel would fail.
4245 // FIXME: Should we ignore the upper bits in isel instead?
4246 unsigned ExtOpc =
4247 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4248 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4249 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4250 }
4251
4252 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4253 "Unexpected scalar for splat lowering!");
4254
4255 if (isOneConstant(VL) && isNullConstant(Scalar))
4256 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4257 DAG.getConstant(0, DL, XLenVT), VL);
4258
4259 // Otherwise use the more complicated splatting algorithm.
4260 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4261}
4262
4263// This function lowers an insert of a scalar operand Scalar into lane
4264// 0 of the vector regardless of the value of VL. The contents of the
4265// remaining lanes of the result vector are unspecified. VL is assumed
4266// to be non-zero.
4268 const SDLoc &DL, SelectionDAG &DAG,
4269 const RISCVSubtarget &Subtarget) {
4270 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4271
4272 const MVT XLenVT = Subtarget.getXLenVT();
4273 SDValue Passthru = DAG.getUNDEF(VT);
4274
4275 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4276 isNullConstant(Scalar.getOperand(1))) {
4277 SDValue ExtractedVal = Scalar.getOperand(0);
4278 // The element types must be the same.
4279 if (ExtractedVal.getValueType().getVectorElementType() ==
4280 VT.getVectorElementType()) {
4281 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4282 MVT ExtractedContainerVT = ExtractedVT;
4283 if (ExtractedContainerVT.isFixedLengthVector()) {
4284 ExtractedContainerVT = getContainerForFixedLengthVector(
4285 DAG, ExtractedContainerVT, Subtarget);
4286 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4287 ExtractedVal, DAG, Subtarget);
4288 }
4289 if (ExtractedContainerVT.bitsLE(VT))
4290 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4291 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4292 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4293 DAG.getVectorIdxConstant(0, DL));
4294 }
4295 }
4296
4297
4298 if (VT.isFloatingPoint())
4299 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4300 DAG.getUNDEF(VT), Scalar, VL);
4301
4302 // Avoid the tricky legalization cases by falling back to using the
4303 // splat code which already handles it gracefully.
4304 if (!Scalar.getValueType().bitsLE(XLenVT))
4305 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4306 DAG.getConstant(1, DL, XLenVT),
4307 VT, DL, DAG, Subtarget);
4308
4309 // If the operand is a constant, sign extend to increase our chances
4310 // of being able to use a .vi instruction. ANY_EXTEND would become a
4311 // a zero extend and the simm5 check in isel would fail.
4312 // FIXME: Should we ignore the upper bits in isel instead?
4313 unsigned ExtOpc =
4314 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4315 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4316 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
4317 DAG.getUNDEF(VT), Scalar, VL);
4318}
4319
4320// Is this a shuffle extracts either the even or odd elements of a vector?
4321// That is, specifically, either (a) or (b) below.
4322// t34: v8i8 = extract_subvector t11, Constant:i64<0>
4323// t33: v8i8 = extract_subvector t11, Constant:i64<8>
4324// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4325// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4326// Returns {Src Vector, Even Elements} om success
4327static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4328 SDValue V2, ArrayRef<int> Mask,
4329 const RISCVSubtarget &Subtarget) {
4330 // Need to be able to widen the vector.
4331 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4332 return false;
4333
4334 // Both input must be extracts.
4335 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4336 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4337 return false;
4338
4339 // Extracting from the same source.
4340 SDValue Src = V1.getOperand(0);
4341 if (Src != V2.getOperand(0))
4342 return false;
4343
4344 // Src needs to have twice the number of elements.
4345 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4346 return false;
4347
4348 // The extracts must extract the two halves of the source.
4349 if (V1.getConstantOperandVal(1) != 0 ||
4350 V2.getConstantOperandVal(1) != Mask.size())
4351 return false;
4352
4353 // First index must be the first even or odd element from V1.
4354 if (Mask[0] != 0 && Mask[0] != 1)
4355 return false;
4356
4357 // The others must increase by 2 each time.
4358 // TODO: Support undef elements?
4359 for (unsigned i = 1; i != Mask.size(); ++i)
4360 if (Mask[i] != Mask[i - 1] + 2)
4361 return false;
4362
4363 return true;
4364}
4365
4366/// Is this shuffle interleaving contiguous elements from one vector into the
4367/// even elements and contiguous elements from another vector into the odd
4368/// elements. \p EvenSrc will contain the element that should be in the first
4369/// even element. \p OddSrc will contain the element that should be in the first
4370/// odd element. These can be the first element in a source or the element half
4371/// way through the source.
4372static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4373 int &OddSrc, const RISCVSubtarget &Subtarget) {
4374 // We need to be able to widen elements to the next larger integer type.
4375 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4376 return false;
4377
4378 int Size = Mask.size();
4379 int NumElts = VT.getVectorNumElements();
4380 assert(Size == (int)NumElts && "Unexpected mask size");
4381
4382 SmallVector<unsigned, 2> StartIndexes;
4383 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4384 return false;
4385
4386 EvenSrc = StartIndexes[0];
4387 OddSrc = StartIndexes[1];
4388
4389 // One source should be low half of first vector.
4390 if (EvenSrc != 0 && OddSrc != 0)
4391 return false;
4392
4393 // Subvectors will be subtracted from either at the start of the two input
4394 // vectors, or at the start and middle of the first vector if it's an unary
4395 // interleave.
4396 // In both cases, HalfNumElts will be extracted.
4397 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4398 // we'll create an illegal extract_subvector.
4399 // FIXME: We could support other values using a slidedown first.
4400 int HalfNumElts = NumElts / 2;
4401 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4402}
4403
4404/// Match shuffles that concatenate two vectors, rotate the concatenation,
4405/// and then extract the original number of elements from the rotated result.
4406/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4407/// returned rotation amount is for a rotate right, where elements move from
4408/// higher elements to lower elements. \p LoSrc indicates the first source
4409/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4410/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4411/// 0 or 1 if a rotation is found.
4412///
4413/// NOTE: We talk about rotate to the right which matches how bit shift and
4414/// rotate instructions are described where LSBs are on the right, but LLVM IR
4415/// and the table below write vectors with the lowest elements on the left.
4416static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4417 int Size = Mask.size();
4418
4419 // We need to detect various ways of spelling a rotation:
4420 // [11, 12, 13, 14, 15, 0, 1, 2]
4421 // [-1, 12, 13, 14, -1, -1, 1, -1]
4422 // [-1, -1, -1, -1, -1, -1, 1, 2]
4423 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4424 // [-1, 4, 5, 6, -1, -1, 9, -1]
4425 // [-1, 4, 5, 6, -1, -1, -1, -1]
4426 int Rotation = 0;
4427 LoSrc = -1;
4428 HiSrc = -1;
4429 for (int i = 0; i != Size; ++i) {
4430 int M = Mask[i];
4431 if (M < 0)
4432 continue;
4433
4434 // Determine where a rotate vector would have started.
4435 int StartIdx = i - (M % Size);
4436 // The identity rotation isn't interesting, stop.
4437 if (StartIdx == 0)
4438 return -1;
4439
4440 // If we found the tail of a vector the rotation must be the missing
4441 // front. If we found the head of a vector, it must be how much of the
4442 // head.
4443 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4444
4445 if (Rotation == 0)
4446 Rotation = CandidateRotation;
4447 else if (Rotation != CandidateRotation)
4448 // The rotations don't match, so we can't match this mask.
4449 return -1;
4450
4451 // Compute which value this mask is pointing at.
4452 int MaskSrc = M < Size ? 0 : 1;
4453
4454 // Compute which of the two target values this index should be assigned to.
4455 // This reflects whether the high elements are remaining or the low elemnts
4456 // are remaining.
4457 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4458
4459 // Either set up this value if we've not encountered it before, or check
4460 // that it remains consistent.
4461 if (TargetSrc < 0)
4462 TargetSrc = MaskSrc;
4463 else if (TargetSrc != MaskSrc)
4464 // This may be a rotation, but it pulls from the inputs in some
4465 // unsupported interleaving.
4466 return -1;
4467 }
4468
4469 // Check that we successfully analyzed the mask, and normalize the results.
4470 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4471 assert((LoSrc >= 0 || HiSrc >= 0) &&
4472 "Failed to find a rotated input vector!");
4473
4474 return Rotation;
4475}
4476
4477// Lower a deinterleave shuffle to vnsrl.
4478// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4479// -> [p, q, r, s] (EvenElts == false)
4480// VT is the type of the vector to return, <[vscale x ]n x ty>
4481// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4483 bool EvenElts,
4484 const RISCVSubtarget &Subtarget,
4485 SelectionDAG &DAG) {
4486 // The result is a vector of type <m x n x ty>
4487 MVT ContainerVT = VT;
4488 // Convert fixed vectors to scalable if needed
4489 if (ContainerVT.isFixedLengthVector()) {
4490 assert(Src.getSimpleValueType().isFixedLengthVector());
4491 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
4492
4493 // The source is a vector of type <m x n*2 x ty>
4494 MVT SrcContainerVT =
4496 ContainerVT.getVectorElementCount() * 2);
4497 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4498 }
4499
4500 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4501
4502 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4503 // This also converts FP to int.
4504 unsigned EltBits = ContainerVT.getScalarSizeInBits();
4505 MVT WideSrcContainerVT = MVT::getVectorVT(
4506 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
4507 Src = DAG.getBitcast(WideSrcContainerVT, Src);
4508
4509 // The integer version of the container type.
4510 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
4511
4512 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4513 // the original element size.
4514 unsigned Shift = EvenElts ? 0 : EltBits;
4515 SDValue SplatShift = DAG.getNode(
4516 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
4517 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
4518 SDValue Res =
4519 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
4520 DAG.getUNDEF(IntContainerVT), TrueMask, VL);
4521 // Cast back to FP if needed.
4522 Res = DAG.getBitcast(ContainerVT, Res);
4523
4524 if (VT.isFixedLengthVector())
4525 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
4526 return Res;
4527}
4528
4529// Lower the following shuffle to vslidedown.
4530// a)
4531// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4532// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4533// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4534// b)
4535// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4536// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4537// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4538// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4539// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4540// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4542 SDValue V1, SDValue V2,
4543 ArrayRef<int> Mask,
4544 const RISCVSubtarget &Subtarget,
4545 SelectionDAG &DAG) {
4546 auto findNonEXTRACT_SUBVECTORParent =
4547 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4548 uint64_t Offset = 0;
4549 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4550 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4551 // a scalable vector. But we don't want to match the case.
4552 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4553 Offset += Parent.getConstantOperandVal(1);
4554 Parent = Parent.getOperand(0);
4555 }
4556 return std::make_pair(Parent, Offset);
4557 };
4558
4559 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4560 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4561
4562 // Extracting from the same source.
4563 SDValue Src = V1Src;
4564 if (Src != V2Src)
4565 return SDValue();
4566
4567 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4568 SmallVector<int, 16> NewMask(Mask);
4569 for (size_t i = 0; i != NewMask.size(); ++i) {
4570 if (NewMask[i] == -1)
4571 continue;
4572
4573 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4574 NewMask[i] = NewMask[i] + V1IndexOffset;
4575 } else {
4576 // Minus NewMask.size() is needed. Otherwise, the b case would be
4577 // <5,6,7,12> instead of <5,6,7,8>.
4578 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4579 }
4580 }
4581
4582 // First index must be known and non-zero. It will be used as the slidedown
4583 // amount.
4584 if (NewMask[0] <= 0)
4585 return SDValue();
4586
4587 // NewMask is also continuous.
4588 for (unsigned i = 1; i != NewMask.size(); ++i)
4589 if (NewMask[i - 1] + 1 != NewMask[i])
4590 return SDValue();
4591
4592 MVT XLenVT = Subtarget.getXLenVT();
4593 MVT SrcVT = Src.getSimpleValueType();
4594 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4595 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4596 SDValue Slidedown =
4597 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4598 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4599 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4600 return DAG.getNode(
4602 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4603 DAG.getConstant(0, DL, XLenVT));
4604}
4605
4606// Because vslideup leaves the destination elements at the start intact, we can
4607// use it to perform shuffles that insert subvectors:
4608//
4609// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4610// ->
4611// vsetvli zero, 8, e8, mf2, ta, ma
4612// vslideup.vi v8, v9, 4
4613//
4614// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4615// ->
4616// vsetvli zero, 5, e8, mf2, tu, ma
4617// vslideup.v1 v8, v9, 2
4619 SDValue V1, SDValue V2,
4620 ArrayRef<int> Mask,
4621 const RISCVSubtarget &Subtarget,
4622 SelectionDAG &DAG) {
4623 unsigned NumElts = VT.getVectorNumElements();
4624 int NumSubElts, Index;
4625 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4626 Index))
4627 return SDValue();
4628
4629 bool OpsSwapped = Mask[Index] < (int)NumElts;
4630 SDValue InPlace = OpsSwapped ? V2 : V1;
4631 SDValue ToInsert = OpsSwapped ? V1 : V2;
4632
4633 MVT XLenVT = Subtarget.getXLenVT();
4634 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4635 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4636 // We slide up by the index that the subvector is being inserted at, and set
4637 // VL to the index + the number of elements being inserted.
4639 // If the we're adding a suffix to the in place vector, i.e. inserting right
4640 // up to the very end of it, then we don't actually care about the tail.
4641 if (NumSubElts + Index >= (int)NumElts)
4642 Policy |= RISCVII::TAIL_AGNOSTIC;
4643
4644 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4645 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4646 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4647
4648 SDValue Res;
4649 // If we're inserting into the lowest elements, use a tail undisturbed
4650 // vmv.v.v.
4651 if (Index == 0)
4652 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4653 VL);
4654 else
4655 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4656 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4657 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4658}
4659
4660/// Match v(f)slide1up/down idioms. These operations involve sliding
4661/// N-1 elements to make room for an inserted scalar at one end.
4663 SDValue V1, SDValue V2,
4664 ArrayRef<int> Mask,
4665 const RISCVSubtarget &Subtarget,
4666 SelectionDAG &DAG) {
4667 bool OpsSwapped = false;
4668 if (!isa<BuildVectorSDNode>(V1)) {
4669 if (!isa<BuildVectorSDNode>(V2))
4670 return SDValue();
4671 std::swap(V1, V2);
4672 OpsSwapped = true;
4673 }
4674 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4675 if (!Splat)
4676 return SDValue();
4677
4678 // Return true if the mask could describe a slide of Mask.size() - 1
4679 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4680 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4681 const unsigned S = (Offset > 0) ? 0 : -Offset;
4682 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4683 for (unsigned i = S; i != E; ++i)
4684 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4685 return false;
4686 return true;
4687 };
4688
4689 const unsigned NumElts = VT.getVectorNumElements();
4690 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4691 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4692 return SDValue();
4693
4694 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4695 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4696 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4697 return SDValue();
4698
4699 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4700 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4701 auto OpCode = IsVSlidedown ?
4704 if (!VT.isFloatingPoint())
4705 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4706 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4707 DAG.getUNDEF(ContainerVT),
4708 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4709 Splat, TrueMask, VL);
4710 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4711}
4712
4713// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4714// to create an interleaved vector of <[vscale x] n*2 x ty>.
4715// This requires that the size of ty is less than the subtarget's maximum ELEN.
4717 const SDLoc &DL, SelectionDAG &DAG,
4718 const RISCVSubtarget &Subtarget) {
4719 MVT VecVT = EvenV.getSimpleValueType();
4720 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4721 // Convert fixed vectors to scalable if needed
4722 if (VecContainerVT.isFixedLengthVector()) {
4723 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4724 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4725 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4726 }
4727
4728 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4729
4730 // We're working with a vector of the same size as the resulting
4731 // interleaved vector, but with half the number of elements and
4732 // twice the SEW (Hence the restriction on not using the maximum
4733 // ELEN)
4734 MVT WideVT =
4736 VecVT.getVectorElementCount());
4737 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4738 if (WideContainerVT.isFixedLengthVector())
4739 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4740
4741 // Bitcast the input vectors to integers in case they are FP
4742 VecContainerVT = VecContainerVT.changeTypeToInteger();
4743 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4744 OddV = DAG.getBitcast(VecContainerVT, OddV);
4745
4746 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4747 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4748
4749 SDValue Interleaved;
4750 if (OddV.isUndef()) {
4751 // If OddV is undef, this is a zero extend.
4752 // FIXME: Not only does this optimize the code, it fixes some correctness
4753 // issues because MIR does not have freeze.
4754 Interleaved =
4755 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);
4756 } else if (Subtarget.hasStdExtZvbb()) {
4757 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4758 SDValue OffsetVec =
4759 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4760 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4761 OffsetVec, Passthru, Mask, VL);
4762 if (!EvenV.isUndef())
4763 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4764 Interleaved, EvenV, Passthru, Mask, VL);
4765 } else if (EvenV.isUndef()) {
4766 Interleaved =
4767 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);
4768
4769 SDValue OffsetVec =
4770 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
4771 Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
4772 Interleaved, OffsetVec, Passthru, Mask, VL);
4773 } else {
4774 // FIXME: We should freeze the odd vector here. We already handled the case
4775 // of provably undef/poison above.
4776
4777 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4778 // vwaddu.vv
4779 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4780 OddV, Passthru, Mask, VL);
4781
4782 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4783 SDValue AllOnesVec = DAG.getSplatVector(
4784 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4785 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4786 OddV, AllOnesVec, Passthru, Mask, VL);
4787
4788 // Add the two together so we get
4789 // (OddV * 0xff...ff) + (OddV + EvenV)
4790 // = (OddV * 0x100...00) + EvenV
4791 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4792 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4793 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4794 Interleaved, OddsMul, Passthru, Mask, VL);
4795 }
4796
4797 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4798 MVT ResultContainerVT = MVT::getVectorVT(
4799 VecVT.getVectorElementType(), // Make sure to use original type
4800 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4801 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4802
4803 // Convert back to a fixed vector if needed
4804 MVT ResultVT =
4807 if (ResultVT.isFixedLengthVector())
4808 Interleaved =
4809 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4810
4811 return Interleaved;
4812}
4813
4814// If we have a vector of bits that we want to reverse, we can use a vbrev on a
4815// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4817 SelectionDAG &DAG,
4818 const RISCVSubtarget &Subtarget) {
4819 SDLoc DL(SVN);
4820 MVT VT = SVN->getSimpleValueType(0);
4821 SDValue V = SVN->getOperand(0);
4822 unsigned NumElts = VT.getVectorNumElements();
4823
4824 assert(VT.getVectorElementType() == MVT::i1);
4825
4827 SVN->getMask().size()) ||
4828 !SVN->getOperand(1).isUndef())
4829 return SDValue();
4830
4831 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
4832 EVT ViaVT = EVT::getVectorVT(
4833 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
4834 EVT ViaBitVT =
4835 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
4836
4837 // If we don't have zvbb or the larger element type > ELEN, the operation will
4838 // be illegal.
4840 ViaVT) ||
4841 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
4842 return SDValue();
4843
4844 // If the bit vector doesn't fit exactly into the larger element type, we need
4845 // to insert it into the larger vector and then shift up the reversed bits
4846 // afterwards to get rid of the gap introduced.
4847 if (ViaEltSize > NumElts)
4848 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
4849 V, DAG.getVectorIdxConstant(0, DL));
4850
4851 SDValue Res =
4852 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
4853
4854 // Shift up the reversed bits if the vector didn't exactly fit into the larger
4855 // element type.
4856 if (ViaEltSize > NumElts)
4857 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
4858 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
4859
4860 Res = DAG.getBitcast(ViaBitVT, Res);
4861
4862 if (ViaEltSize > NumElts)
4863 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
4864 DAG.getVectorIdxConstant(0, DL));
4865 return Res;
4866}
4867
4869 SelectionDAG &DAG,
4870 const RISCVSubtarget &Subtarget,
4871 MVT &RotateVT, unsigned &RotateAmt) {
4872 SDLoc DL(SVN);
4873
4874 EVT VT = SVN->getValueType(0);
4875 unsigned NumElts = VT.getVectorNumElements();
4876 unsigned EltSizeInBits = VT.getScalarSizeInBits();
4877 unsigned NumSubElts;
4878 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
4879 NumElts, NumSubElts, RotateAmt))
4880 return false;
4881 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
4882 NumElts / NumSubElts);
4883
4884 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4885 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
4886}
4887
4888// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4889// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4890// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4892 SelectionDAG &DAG,
4893 const RISCVSubtarget &Subtarget) {
4894 SDLoc DL(SVN);
4895
4896 EVT VT = SVN->getValueType(0);
4897 unsigned RotateAmt;
4898 MVT RotateVT;
4899 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4900 return SDValue();
4901
4902 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
4903
4904 SDValue Rotate;
4905 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4906 // so canonicalize to vrev8.
4907 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
4908 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
4909 else
4910 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
4911 DAG.getConstant(RotateAmt, DL, RotateVT));
4912
4913 return DAG.getBitcast(VT, Rotate);
4914}
4915
4916// If compiling with an exactly known VLEN, see if we can split a
4917// shuffle on m2 or larger into a small number of m1 sized shuffles
4918// which write each destination registers exactly once.
4920 SelectionDAG &DAG,
4921 const RISCVSubtarget &Subtarget) {
4922 SDLoc DL(SVN);
4923 MVT VT = SVN->getSimpleValueType(0);
4924 SDValue V1 = SVN->getOperand(0);
4925 SDValue V2 = SVN->getOperand(1);
4926 ArrayRef<int> Mask = SVN->getMask();
4927 unsigned NumElts = VT.getVectorNumElements();
4928
4929 // If we don't know exact data layout, not much we can do. If this
4930 // is already m1 or smaller, no point in splitting further.
4931 const auto VLen = Subtarget.getRealVLen();
4932 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
4933 return SDValue();
4934
4935 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
4936 // expansion for.
4937 unsigned RotateAmt;
4938 MVT RotateVT;
4939 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4940 return SDValue();
4941
4942 MVT ElemVT = VT.getVectorElementType();
4943 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4944 unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
4945
4947 OutMasks(VRegsPerSrc, {-1, {}});
4948
4949 // Check if our mask can be done as a 1-to-1 mapping from source
4950 // to destination registers in the group without needing to
4951 // write each destination more than once.
4952 for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
4953 int DstVecIdx = DstIdx / ElemsPerVReg;
4954 int DstSubIdx = DstIdx % ElemsPerVReg;
4955 int SrcIdx = Mask[DstIdx];
4956 if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
4957 continue;
4958 int SrcVecIdx = SrcIdx / ElemsPerVReg;
4959 int SrcSubIdx = SrcIdx % ElemsPerVReg;
4960 if (OutMasks[DstVecIdx].first == -1)
4961 OutMasks[DstVecIdx].first = SrcVecIdx;
4962 if (OutMasks[DstVecIdx].first != SrcVecIdx)
4963 // Note: This case could easily be handled by keeping track of a chain
4964 // of source values and generating two element shuffles below. This is
4965 // less an implementation question, and more a profitability one.
4966 return SDValue();
4967
4968 OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
4969 OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
4970 }
4971
4972 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4973 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4974 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4975 assert(M1VT == getLMUL1VT(M1VT));
4976 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4977 SDValue Vec = DAG.getUNDEF(ContainerVT);
4978 // The following semantically builds up a fixed length concat_vector
4979 // of the component shuffle_vectors. We eagerly lower to scalable here
4980 // to avoid DAG combining it back to a large shuffle_vector again.
4981 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4982 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
4983 for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
4984 auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
4985 if (SrcVecIdx == -1)
4986 continue;
4987 unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
4988 SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
4989 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
4990 DAG.getVectorIdxConstant(ExtractIdx, DL));
4991 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
4992 SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
4993 SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
4994 unsigned InsertIdx = DstVecIdx * NumOpElts;
4995 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,
4996 DAG.getVectorIdxConstant(InsertIdx, DL));
4997 }
4998 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4999}
5000
5002 const RISCVSubtarget &Subtarget) {
5003 SDValue V1 = Op.getOperand(0);
5004 SDValue V2 = Op.getOperand(1);
5005 SDLoc DL(Op);
5006 MVT XLenVT = Subtarget.getXLenVT();
5007 MVT VT = Op.getSimpleValueType();
5008 unsigned NumElts = VT.getVectorNumElements();
5009 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
5010
5011 if (VT.getVectorElementType() == MVT::i1) {
5012 // Lower to a vror.vi of a larger element type if possible before we promote
5013 // i1s to i8s.
5014 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5015 return V;
5016 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5017 return V;
5018
5019 // Promote i1 shuffle to i8 shuffle.
5020 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5021 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5022 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5023 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5024 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5025 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5026 ISD::SETNE);
5027 }
5028
5029 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5030
5031 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5032
5033 if (SVN->isSplat()) {
5034 const int Lane = SVN->getSplatIndex();
5035 if (Lane >= 0) {
5036 MVT SVT = VT.getVectorElementType();
5037
5038 // Turn splatted vector load into a strided load with an X0 stride.
5039 SDValue V = V1;
5040 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5041 // with undef.
5042 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5043 int Offset = Lane;
5044 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5045 int OpElements =
5046 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5047 V = V.getOperand(Offset / OpElements);
5048 Offset %= OpElements;
5049 }
5050
5051 // We need to ensure the load isn't atomic or volatile.
5052 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5053 auto *Ld = cast<LoadSDNode>(V);
5054 Offset *= SVT.getStoreSize();
5055 SDValue NewAddr = DAG.getMemBasePlusOffset(
5056 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5057
5058 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5059 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5060 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5061 SDValue IntID =
5062 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5063 SDValue Ops[] = {Ld->getChain(),
5064 IntID,
5065 DAG.getUNDEF(ContainerVT),
5066 NewAddr,
5067 DAG.getRegister(RISCV::X0, XLenVT),
5068 VL};
5069 SDValue NewLoad = DAG.getMemIntrinsicNode(
5070 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5072 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5073 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5074 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5075 }
5076
5077 MVT SplatVT = ContainerVT;
5078
5079 // If we don't have Zfh, we need to use an integer scalar load.
5080 if (SVT == MVT::f16 && !Subtarget.hasStdExtZfh()) {
5081 SVT = MVT::i16;
5082 SplatVT = ContainerVT.changeVectorElementType(SVT);
5083 }
5084
5085 // Otherwise use a scalar load and splat. This will give the best
5086 // opportunity to fold a splat into the operation. ISel can turn it into
5087 // the x0 strided load if we aren't able to fold away the select.
5088 if (SVT.isFloatingPoint())
5089 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5090 Ld->getPointerInfo().getWithOffset(Offset),
5091 Ld->getOriginalAlign(),
5092 Ld->getMemOperand()->getFlags());
5093 else
5094 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5095 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5096 Ld->getOriginalAlign(),
5097 Ld->getMemOperand()->getFlags());
5099
5100 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5102 SDValue Splat =
5103 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5104 Splat = DAG.getBitcast(ContainerVT, Splat);
5105 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5106 }
5107
5108 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5109 assert(Lane < (int)NumElts && "Unexpected lane!");
5110 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5111 V1, DAG.getConstant(Lane, DL, XLenVT),
5112 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5113 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5114 }
5115 }
5116
5117 // For exact VLEN m2 or greater, try to split to m1 operations if we
5118 // can split cleanly.
5119 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5120 return V;
5121
5122 ArrayRef<int> Mask = SVN->getMask();
5123
5124 if (SDValue V =
5125 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5126 return V;
5127
5128 if (SDValue V =
5129 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5130 return V;
5131
5132 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5133 // available.
5134 if (Subtarget.hasStdExtZvkb())
5135 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5136 return V;
5137
5138 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5139 // be undef which can be handled with a single SLIDEDOWN/UP.
5140 int LoSrc, HiSrc;
5141 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5142 if (Rotation > 0) {
5143 SDValue LoV, HiV;
5144 if (LoSrc >= 0) {
5145 LoV = LoSrc == 0 ? V1 : V2;
5146 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5147 }
5148 if (HiSrc >= 0) {
5149 HiV = HiSrc == 0 ? V1 : V2;
5150 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5151 }
5152
5153 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5154 // to slide LoV up by (NumElts - Rotation).
5155 unsigned InvRotate = NumElts - Rotation;
5156
5157 SDValue Res = DAG.getUNDEF(ContainerVT);
5158 if (HiV) {
5159 // Even though we could use a smaller VL, don't to avoid a vsetivli
5160 // toggle.
5161 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5162 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5163 }
5164 if (LoV)
5165 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5166 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5168
5169 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5170 }
5171
5172 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef())
5173 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5174
5175 // If this is a deinterleave and we can widen the vector, then we can use
5176 // vnsrl to deinterleave.
5177 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
5178 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
5179 Subtarget, DAG);
5180 }
5181
5182 if (SDValue V =
5183 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5184 return V;
5185
5186 // Detect an interleave shuffle and lower to
5187 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5188 int EvenSrc, OddSrc;
5189 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5190 // Extract the halves of the vectors.
5191 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5192
5193 int Size = Mask.size();
5194 SDValue EvenV, OddV;
5195 assert(EvenSrc >= 0 && "Undef source?");
5196 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5197 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5198 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5199
5200 assert(OddSrc >= 0 && "Undef source?");
5201 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5202 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5203 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5204
5205 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5206 }
5207
5208
5209 // Handle any remaining single source shuffles
5210 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5211 if (V2.isUndef()) {
5212 // We might be able to express the shuffle as a bitrotate. But even if we
5213 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5214 // shifts and a vor will have a higher throughput than a vrgather.
5215 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5216 return V;
5217
5218 if (VT.getScalarSizeInBits() == 8 &&
5219 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5220 // On such a vector we're unable to use i8 as the index type.
5221 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5222 // may involve vector splitting if we're already at LMUL=8, or our
5223 // user-supplied maximum fixed-length LMUL.
5224 return SDValue();
5225 }
5226
5227 // Base case for the two operand recursion below - handle the worst case
5228 // single source shuffle.
5229 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5230 MVT IndexVT = VT.changeTypeToInteger();
5231 // Since we can't introduce illegal index types at this stage, use i16 and
5232 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5233 // than XLenVT.
5234 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5235 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5236 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5237 }
5238
5239 // If the mask allows, we can do all the index computation in 16 bits. This
5240 // requires less work and less register pressure at high LMUL, and creates
5241 // smaller constants which may be cheaper to materialize.
5242 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5243 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5244 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5245 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5246 }
5247
5248 MVT IndexContainerVT =
5249 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5250
5251 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5252 SmallVector<SDValue> GatherIndicesLHS;
5253 for (int MaskIndex : Mask) {
5254 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5255 GatherIndicesLHS.push_back(IsLHSIndex
5256 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5257 : DAG.getUNDEF(XLenVT));
5258 }
5259 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5260 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5261 Subtarget);
5262 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5263 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5264 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5265 }
5266
5267 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5268 // merged with a second vrgather.
5269 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5270
5271 // Now construct the mask that will be used by the blended vrgather operation.
5272 // Construct the appropriate indices into each vector.
5273 for (int MaskIndex : Mask) {
5274 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5275 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5276 ? MaskIndex : -1);
5277 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5278 }
5279
5280 // Try to pick a profitable operand order.
5281 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5282 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
5283
5284 // Recursively invoke lowering for each operand if we had two
5285 // independent single source shuffles, and then combine the result via a
5286 // vselect. Note that the vselect will likely be folded back into the
5287 // second permute (vrgather, or other) by the post-isel combine.
5288 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5289 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5290
5291 SmallVector<SDValue> MaskVals;
5292 for (int MaskIndex : Mask) {
5293 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5294 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5295 }
5296
5297 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5298 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5299 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5300
5301 if (SwapOps)
5302 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5303 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5304}
5305
5307 // Support splats for any type. These should type legalize well.
5308 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5309 return true;
5310
5311 // Only support legal VTs for other shuffles for now.
5312 if (!isTypeLegal(VT))
5313 return false;
5314
5315 MVT SVT = VT.getSimpleVT();
5316
5317 // Not for i1 vectors.
5318 if (SVT.getScalarType() == MVT::i1)
5319 return false;
5320
5321 int Dummy1, Dummy2;
5322 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5323 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5324}
5325
5326// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5327// the exponent.
5328SDValue
5329RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5330 SelectionDAG &DAG) const {
5331 MVT VT = Op.getSimpleValueType();
5332 unsigned EltSize = VT.getScalarSizeInBits();
5333 SDValue Src = Op.getOperand(0);
5334 SDLoc DL(Op);
5335 MVT ContainerVT = VT;
5336
5337 SDValue Mask, VL;
5338 if (Op->isVPOpcode()) {
5339 Mask = Op.getOperand(1);
5340 if (VT.isFixedLengthVector())
5341 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5342 Subtarget);
5343 VL = Op.getOperand(2);
5344 }
5345
5346 // We choose FP type that can represent the value if possible. Otherwise, we
5347 // use rounding to zero conversion for correct exponent of the result.
5348 // TODO: Use f16 for i8 when possible?
5349 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5350 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5351 FloatEltVT = MVT::f32;
5352 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5353
5354 // Legal types should have been checked in the RISCVTargetLowering
5355 // constructor.
5356 // TODO: Splitting may make sense in some cases.
5357 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5358 "Expected legal float type!");
5359
5360 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5361 // The trailing zero count is equal to log2 of this single bit value.
5362 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5363 SDValue Neg = DAG.getNegative(Src, DL, VT);
5364 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5365 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5366 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5367 Src, Mask, VL);
5368 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5369 }
5370
5371 // We have a legal FP type, convert to it.
5372 SDValue FloatVal;
5373 if (FloatVT.bitsGT(VT)) {
5374 if (Op->isVPOpcode())
5375 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5376 else
5377 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5378 } else {
5379 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5380 if (VT.isFixedLengthVector()) {
5381 ContainerVT = getContainerForFixedLengthVector(VT);
5382 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5383 }
5384 if (!Op->isVPOpcode())
5385 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5386 SDValue RTZRM =
5388 MVT ContainerFloatVT =
5389 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5390 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5391 Src, Mask, RTZRM, VL);
5392 if (VT.isFixedLengthVector())
5393 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5394 }
5395 // Bitcast to integer and shift the exponent to the LSB.
5396 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5397 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5398 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5399
5400 SDValue Exp;
5401 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5402 if (Op->isVPOpcode()) {
5403 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
5404 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5405 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5406 } else {
5407 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5408 DAG.getConstant(ShiftAmt, DL, IntVT));
5409 if (IntVT.bitsLT(VT))
5410 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5411 else if (IntVT.bitsGT(VT))
5412 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5413 }
5414
5415 // The exponent contains log2 of the value in biased form.
5416 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5417 // For trailing zeros, we just need to subtract the bias.
5418 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5419 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5420 DAG.getConstant(ExponentBias, DL, VT));
5421 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5422 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5423 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5424
5425 // For leading zeros, we need to remove the bias and convert from log2 to
5426 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5427 unsigned Adjust = ExponentBias + (EltSize - 1);
5428 SDValue Res;
5429 if (Op->isVPOpcode())
5430 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5431 Mask, VL);
5432 else
5433 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5434
5435 // The above result with zero input equals to Adjust which is greater than
5436 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5437 if (Op.getOpcode() == ISD::CTLZ)
5438 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5439 else if (Op.getOpcode() == ISD::VP_CTLZ)
5440 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5441 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5442 return Res;
5443}
5444
5445SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
5446 SelectionDAG &DAG) const {
5447 SDLoc DL(Op);
5448 MVT XLenVT = Subtarget.getXLenVT();
5449 SDValue Source = Op->getOperand(0);
5450 MVT SrcVT = Source.getSimpleValueType();
5451 SDValue Mask = Op->getOperand(1);
5452 SDValue EVL = Op->getOperand(2);
5453
5454 if (SrcVT.isFixedLengthVector()) {
5455 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
5456 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
5457 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5458 Subtarget);
5459 SrcVT = ContainerVT;
5460 }
5461
5462 // Convert to boolean vector.
5463 if (SrcVT.getScalarType() != MVT::i1) {
5464 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
5465 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
5466 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
5467 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
5468 DAG.getUNDEF(SrcVT), Mask, EVL});
5469 }
5470
5471 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
5472 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
5473 // In this case, we can interpret poison as -1, so nothing to do further.
5474 return Res;
5475
5476 // Convert -1 to VL.
5477 SDValue SetCC =
5478 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
5479 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
5480 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
5481}
5482
5483// While RVV has alignment restrictions, we should always be able to load as a
5484// legal equivalently-sized byte-typed vector instead. This method is
5485// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5486// the load is already correctly-aligned, it returns SDValue().
5487SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5488 SelectionDAG &DAG) const {
5489 auto *Load = cast<LoadSDNode>(Op);
5490 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5491
5493 Load->getMemoryVT(),
5494 *Load->getMemOperand()))
5495 return SDValue();
5496
5497 SDLoc DL(Op);
5498 MVT VT = Op.getSimpleValueType();
5499 unsigned EltSizeBits = VT.getScalarSizeInBits();
5500 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5501 "Unexpected unaligned RVV load type");
5502 MVT NewVT =
5503 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5504 assert(NewVT.isValid() &&
5505 "Expecting equally-sized RVV vector types to be legal");
5506 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5507 Load->getPointerInfo(), Load->getOriginalAlign(),
5508 Load->getMemOperand()->getFlags());
5509 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5510}
5511
5512// While RVV has alignment restrictions, we should always be able to store as a
5513// legal equivalently-sized byte-typed vector instead. This method is
5514// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5515// returns SDValue() if the store is already correctly aligned.
5516SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5517 SelectionDAG &DAG) const {
5518 auto *Store = cast<StoreSDNode>(Op);
5519 assert(Store && Store->getValue().getValueType().isVector() &&
5520 "Expected vector store");
5521
5523 Store->getMemoryVT(),
5524 *Store->getMemOperand()))
5525 return SDValue();
5526
5527 SDLoc DL(Op);
5528 SDValue StoredVal = Store->getValue();
5529 MVT VT = StoredVal.getSimpleValueType();
5530 unsigned EltSizeBits = VT.getScalarSizeInBits();
5531 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5532 "Unexpected unaligned RVV store type");
5533 MVT NewVT =
5534 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5535 assert(NewVT.isValid() &&
5536 "Expecting equally-sized RVV vector types to be legal");
5537 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5538 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5539 Store->getPointerInfo(), Store->getOriginalAlign(),
5540 Store->getMemOperand()->getFlags());
5541}
5542
5544 const RISCVSubtarget &Subtarget) {
5545 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5546
5547 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5548
5549 // All simm32 constants should be handled by isel.
5550 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5551 // this check redundant, but small immediates are common so this check
5552 // should have better compile time.
5553 if (isInt<32>(Imm))
5554 return Op;
5555
5556 // We only need to cost the immediate, if constant pool lowering is enabled.
5557 if (!Subtarget.useConstantPoolForLargeInts())
5558 return Op;
5559
5561 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5562 return Op;
5563
5564 // Optimizations below are disabled for opt size. If we're optimizing for
5565 // size, use a constant pool.
5566 if (DAG.shouldOptForSize())
5567 return SDValue();
5568
5569 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5570 // that if it will avoid a constant pool.
5571 // It will require an extra temporary register though.
5572 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5573 // low and high 32 bits are the same and bit 31 and 63 are set.
5574 unsigned ShiftAmt, AddOpc;
5575 RISCVMatInt::InstSeq SeqLo =
5576 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
5577 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5578 return Op;
5579
5580 return SDValue();
5581}
5582
5584 const RISCVSubtarget &Subtarget) {
5585 SDLoc dl(Op);
5586 AtomicOrdering FenceOrdering =
5587 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5588 SyncScope::ID FenceSSID =
5589 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5590
5591 if (Subtarget.hasStdExtZtso()) {
5592 // The only fence that needs an instruction is a sequentially-consistent
5593 // cross-thread fence.
5594 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5595 FenceSSID == SyncScope::System)
5596 return Op;
5597
5598 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5599 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5600 }
5601
5602 // singlethread fences only synchronize with signal handlers on the same
5603 // thread and thus only need to preserve instruction order, not actually
5604 // enforce memory ordering.
5605 if (FenceSSID == SyncScope::SingleThread)
5606 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5607 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5608
5609 return Op;
5610}
5611
5612SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5613 SelectionDAG &DAG) const {
5614 SDLoc DL(Op);
5615 MVT VT = Op.getSimpleValueType();
5616 MVT XLenVT = Subtarget.getXLenVT();
5617 unsigned Check = Op.getConstantOperandVal(1);
5618 unsigned TDCMask = 0;
5619 if (Check & fcSNan)
5620 TDCMask |= RISCV::FPMASK_Signaling_NaN;
5621 if (Check & fcQNan)
5622 TDCMask |= RISCV::FPMASK_Quiet_NaN;
5623 if (Check & fcPosInf)
5625 if (Check & fcNegInf)
5627 if (Check & fcPosNormal)
5629 if (Check & fcNegNormal)
5631 if (Check & fcPosSubnormal)
5633 if (Check & fcNegSubnormal)
5635 if (Check & fcPosZero)
5636 TDCMask |= RISCV::FPMASK_Positive_Zero;
5637 if (Check & fcNegZero)
5638 TDCMask |= RISCV::FPMASK_Negative_Zero;
5639
5640 bool IsOneBitMask = isPowerOf2_32(TDCMask);
5641
5642 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
5643
5644 if (VT.isVector()) {
5645 SDValue Op0 = Op.getOperand(0);
5646 MVT VT0 = Op.getOperand(0).getSimpleValueType();
5647
5648 if (VT.isScalableVector()) {
5650 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
5651 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5652 Mask = Op.getOperand(2);
5653 VL = Op.getOperand(3);
5654 }
5655 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
5656 VL, Op->getFlags());
5657 if (IsOneBitMask)
5658 return DAG.getSetCC(DL, VT, FPCLASS,
5659 DAG.getConstant(TDCMask, DL, DstVT),
5661 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
5662 DAG.getConstant(TDCMask, DL, DstVT));
5663 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
5664 ISD::SETNE);
5665 }
5666
5667 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
5668 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5669 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
5670 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
5671 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5672 Mask = Op.getOperand(2);
5673 MVT MaskContainerVT =
5674 getContainerForFixedLengthVector(Mask.getSimpleValueType());
5675 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
5676 VL = Op.getOperand(3);
5677 }
5678 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
5679
5680 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
5681 Mask, VL, Op->getFlags());
5682
5683 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5684 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
5685 if (IsOneBitMask) {
5686 SDValue VMSEQ =
5687 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5688 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
5689 DAG.getUNDEF(ContainerVT), Mask, VL});
5690 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
5691 }
5692 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
5693 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
5694
5695 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
5696 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5697 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
5698
5699 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5700 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
5701 DAG.getUNDEF(ContainerVT), Mask, VL});
5702 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
5703 }
5704
5705 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
5706 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
5707 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
5709 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
5710}
5711
5712// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5713// operations propagate nans.
5715 const RISCVSubtarget &Subtarget) {
5716 SDLoc DL(Op);
5717 MVT VT = Op.getSimpleValueType();
5718
5719 SDValue X = Op.getOperand(0);
5720 SDValue Y = Op.getOperand(1);
5721
5722 if (!VT.isVector()) {
5723 MVT XLenVT = Subtarget.getXLenVT();
5724
5725 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5726 // ensures that when one input is a nan, the other will also be a nan
5727 // allowing the nan to propagate. If both inputs are nan, this will swap the
5728 // inputs which is harmless.
5729
5730 SDValue NewY = Y;
5731 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
5732 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
5733 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
5734 }
5735
5736 SDValue NewX = X;
5737 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
5738 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
5739 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
5740 }
5741
5742 unsigned Opc =
5743 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
5744 return DAG.getNode(Opc, DL, VT, NewX, NewY);
5745 }
5746
5747 // Check no NaNs before converting to fixed vector scalable.
5748 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
5749 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
5750
5751 MVT ContainerVT = VT;
5752 if (VT.isFixedLengthVector()) {
5753 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5754 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
5755 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
5756 }
5757
5758 SDValue Mask, VL;
5759 if (Op->isVPOpcode()) {
5760 Mask = Op.getOperand(2);
5761 if (VT.isFixedLengthVector())
5762 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5763 Subtarget);
5764 VL = Op.getOperand(3);
5765 } else {
5766 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5767 }
5768
5769 SDValue NewY = Y;
5770 if (!XIsNeverNan) {
5771 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5772 {X, X, DAG.getCondCode(ISD::SETOEQ),
5773 DAG.getUNDEF(ContainerVT), Mask, VL});
5774 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
5775 DAG.getUNDEF(ContainerVT), VL);
5776 }
5777
5778 SDValue NewX = X;
5779 if (!YIsNeverNan) {
5780 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5781 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
5782 DAG.getUNDEF(ContainerVT), Mask, VL});
5783 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
5784 DAG.getUNDEF(ContainerVT), VL);
5785 }
5786
5787 unsigned Opc =
5788 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
5791 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
5792 DAG.getUNDEF(ContainerVT), Mask, VL);
5793 if (VT.isFixedLengthVector())
5794 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
5795 return Res;
5796}
5797
5798/// Get a RISC-V target specified VL op for a given SDNode.
5799static unsigned getRISCVVLOp(SDValue Op) {
5800#define OP_CASE(NODE) \
5801 case ISD::NODE: \
5802 return RISCVISD::NODE##_VL;
5803#define VP_CASE(NODE) \
5804 case ISD::VP_##NODE: \
5805 return RISCVISD::NODE##_VL;
5806 // clang-format off
5807 switch (Op.getOpcode()) {
5808 default:
5809 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5810 OP_CASE(ADD)
5811 OP_CASE(SUB)
5812 OP_CASE(MUL)
5813 OP_CASE(MULHS)
5814 OP_CASE(MULHU)
5815 OP_CASE(SDIV)
5816 OP_CASE(SREM)
5817 OP_CASE(UDIV)
5818 OP_CASE(UREM)
5819 OP_CASE(SHL)
5820 OP_CASE(SRA)
5821 OP_CASE(SRL)
5822 OP_CASE(ROTL)
5823 OP_CASE(ROTR)
5824 OP_CASE(BSWAP)
5825 OP_CASE(CTTZ)
5826 OP_CASE(CTLZ)
5827 OP_CASE(CTPOP)
5828 OP_CASE(BITREVERSE)
5829 OP_CASE(SADDSAT)
5830 OP_CASE(UADDSAT)
5831 OP_CASE(SSUBSAT)
5832 OP_CASE(USUBSAT)
5833 OP_CASE(AVGFLOORS)
5834 OP_CASE(AVGFLOORU)
5835 OP_CASE(AVGCEILS)
5836 OP_CASE(AVGCEILU)
5837 OP_CASE(FADD)
5838 OP_CASE(FSUB)
5839 OP_CASE(FMUL)
5840 OP_CASE(FDIV)
5841 OP_CASE(FNEG)
5842 OP_CASE(FABS)
5843 OP_CASE(FSQRT)
5844 OP_CASE(SMIN)
5845 OP_CASE(SMAX)
5846 OP_CASE(UMIN)
5847 OP_CASE(UMAX)
5848 OP_CASE(STRICT_FADD)
5849 OP_CASE(STRICT_FSUB)
5850 OP_CASE(STRICT_FMUL)
5851 OP_CASE(STRICT_FDIV)
5852 OP_CASE(STRICT_FSQRT)
5853 VP_CASE(ADD) // VP_ADD
5854 VP_CASE(SUB) // VP_SUB
5855 VP_CASE(MUL) // VP_MUL
5856 VP_CASE(SDIV) // VP_SDIV
5857 VP_CASE(SREM) // VP_SREM
5858 VP_CASE(UDIV) // VP_UDIV
5859 VP_CASE(UREM) // VP_UREM
5860 VP_CASE(SHL) // VP_SHL
5861 VP_CASE(FADD) // VP_FADD
5862 VP_CASE(FSUB) // VP_FSUB
5863 VP_CASE(FMUL) // VP_FMUL
5864 VP_CASE(FDIV) // VP_FDIV
5865 VP_CASE(FNEG) // VP_FNEG
5866 VP_CASE(FABS) // VP_FABS
5867 VP_CASE(SMIN) // VP_SMIN
5868 VP_CASE(SMAX) // VP_SMAX
5869 VP_CASE(UMIN) // VP_UMIN
5870 VP_CASE(UMAX) // VP_UMAX
5871 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
5872 VP_CASE(SETCC) // VP_SETCC
5873 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
5874 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
5875 VP_CASE(BITREVERSE) // VP_BITREVERSE
5876 VP_CASE(SADDSAT) // VP_SADDSAT
5877 VP_CASE(UADDSAT) // VP_UADDSAT
5878 VP_CASE(SSUBSAT) // VP_SSUBSAT
5879 VP_CASE(USUBSAT) // VP_USUBSAT
5880 VP_CASE(BSWAP) // VP_BSWAP
5881 VP_CASE(CTLZ) // VP_CTLZ
5882 VP_CASE(CTTZ) // VP_CTTZ
5883 VP_CASE(CTPOP) // VP_CTPOP
5885 case ISD::VP_CTLZ_ZERO_UNDEF:
5886 return RISCVISD::CTLZ_VL;
5888 case ISD::VP_CTTZ_ZERO_UNDEF:
5889 return RISCVISD::CTTZ_VL;
5890 case ISD::FMA:
5891 case ISD::VP_FMA:
5892 return RISCVISD::VFMADD_VL;
5893 case ISD::STRICT_FMA:
5895 case ISD::AND:
5896 case ISD::VP_AND:
5897 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5898 return RISCVISD::VMAND_VL;
5899 return RISCVISD::AND_VL;
5900 case ISD::OR:
5901 case ISD::VP_OR:
5902 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5903 return RISCVISD::VMOR_VL;
5904 return RISCVISD::OR_VL;
5905 case ISD::XOR:
5906 case ISD::VP_XOR:
5907 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5908 return RISCVISD::VMXOR_VL;
5909 return RISCVISD::XOR_VL;
5910 case ISD::VP_SELECT:
5911 case ISD::VP_MERGE:
5912 return RISCVISD::VMERGE_VL;
5913 case ISD::VP_SRA:
5914 return RISCVISD::SRA_VL;
5915 case ISD::VP_SRL:
5916 return RISCVISD::SRL_VL;
5917 case ISD::VP_SQRT:
5918 return RISCVISD::FSQRT_VL;
5919 case ISD::VP_SIGN_EXTEND:
5920 return RISCVISD::VSEXT_VL;
5921 case ISD::VP_ZERO_EXTEND:
5922 return RISCVISD::VZEXT_VL;
5923 case ISD::VP_FP_TO_SINT:
5925 case ISD::VP_FP_TO_UINT:
5927 case ISD::FMINNUM:
5928 case ISD::VP_FMINNUM:
5929 return RISCVISD::VFMIN_VL;
5930 case ISD::FMAXNUM:
5931 case ISD::VP_FMAXNUM:
5932 return RISCVISD::VFMAX_VL;
5933 case ISD::LRINT:
5934 case ISD::VP_LRINT:
5935 case ISD::LLRINT:
5936 case ISD::VP_LLRINT:
5938 }
5939 // clang-format on
5940#undef OP_CASE
5941#undef VP_CASE
5942}
5943
5944/// Return true if a RISC-V target specified op has a passthru operand.
5945static bool hasPassthruOp(unsigned Opcode) {
5946 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5948 "not a RISC-V target specific op");
5950 130 &&
5953 21 &&
5954 "adding target specific op should update this function");
5955 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
5956 return true;
5957 if (Opcode == RISCVISD::FCOPYSIGN_VL)
5958 return true;
5959 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
5960 return true;
5961 if (Opcode == RISCVISD::SETCC_VL)
5962 return true;
5963 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
5964 return true;
5965 if (Opcode == RISCVISD::VMERGE_VL)
5966 return true;
5967 return false;
5968}
5969
5970/// Return true if a RISC-V target specified op has a mask operand.
5971static bool hasMaskOp(unsigned Opcode) {
5972 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5974 "not a RISC-V target specific op");
5976 130 &&
5979 21 &&
5980 "adding target specific op should update this function");
5981 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
5982 return true;
5983 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
5984 return true;
5985 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
5987 return true;
5988 return false;
5989}
5990
5992 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5993 SDLoc DL(Op);
5994
5997
5998 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5999 if (!Op.getOperand(j).getValueType().isVector()) {
6000 LoOperands[j] = Op.getOperand(j);
6001 HiOperands[j] = Op.getOperand(j);
6002 continue;
6003 }
6004 std::tie(LoOperands[j], HiOperands[j]) =
6005 DAG.SplitVector(Op.getOperand(j), DL);
6006 }
6007
6008 SDValue LoRes =
6009 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6010 SDValue HiRes =
6011 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6012
6013 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6014}
6015
6017 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
6018 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6019 SDLoc DL(Op);
6020
6023
6024 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6025 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
6026 std::tie(LoOperands[j], HiOperands[j]) =
6027 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
6028 continue;
6029 }
6030 if (!Op.getOperand(j).getValueType().isVector()) {
6031 LoOperands[j] = Op.getOperand(j);
6032 HiOperands[j] = Op.getOperand(j);
6033 continue;
6034 }
6035 std::tie(LoOperands[j], HiOperands[j]) =
6036 DAG.SplitVector(Op.getOperand(j), DL);
6037 }
6038
6039 SDValue LoRes =
6040 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6041 SDValue HiRes =
6042 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6043
6044 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6045}
6046
6048 SDLoc DL(Op);
6049
6050 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
6051 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
6052 auto [EVLLo, EVLHi] =
6053 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
6054
6055 SDValue ResLo =
6056 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6057 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
6058 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6059 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
6060}
6061
6063
6064 assert(Op->isStrictFPOpcode());
6065
6066 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
6067
6068 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
6069 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
6070
6071 SDLoc DL(Op);
6072
6075
6076 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6077 if (!Op.getOperand(j).getValueType().isVector()) {
6078 LoOperands[j] = Op.getOperand(j);
6079 HiOperands[j] = Op.getOperand(j);
6080 continue;
6081 }
6082 std::tie(LoOperands[j], HiOperands[j]) =
6083 DAG.SplitVector(Op.getOperand(j), DL);
6084 }
6085
6086 SDValue LoRes =
6087 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6088 HiOperands[0] = LoRes.getValue(1);
6089 SDValue HiRes =
6090 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6091
6092 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6093 LoRes.getValue(0), HiRes.getValue(0));
6094 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6095}
6096
6098 SelectionDAG &DAG) const {
6099 switch (Op.getOpcode()) {
6100 default:
6101 report_fatal_error("unimplemented operand");
6102 case ISD::ATOMIC_FENCE:
6103 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6104 case ISD::GlobalAddress:
6105 return lowerGlobalAddress(Op, DAG);
6106 case ISD::BlockAddress:
6107 return lowerBlockAddress(Op, DAG);
6108 case ISD::ConstantPool:
6109 return lowerConstantPool(Op, DAG);
6110 case ISD::JumpTable:
6111 return lowerJumpTable(Op, DAG);
6113 return lowerGlobalTLSAddress(Op, DAG);
6114 case ISD::Constant:
6115 return lowerConstant(Op, DAG, Subtarget);
6116 case ISD::SELECT:
6117 return lowerSELECT(Op, DAG);
6118 case ISD::BRCOND:
6119 return lowerBRCOND(Op, DAG);
6120 case ISD::VASTART:
6121 return lowerVASTART(Op, DAG);
6122 case ISD::FRAMEADDR:
6123 return lowerFRAMEADDR(Op, DAG);
6124 case ISD::RETURNADDR:
6125 return lowerRETURNADDR(Op, DAG);
6126 case ISD::SHL_PARTS:
6127 return lowerShiftLeftParts(Op, DAG);
6128 case ISD::SRA_PARTS:
6129 return lowerShiftRightParts(Op, DAG, true);
6130 case ISD::SRL_PARTS:
6131 return lowerShiftRightParts(Op, DAG, false);
6132 case ISD::ROTL:
6133 case ISD::ROTR:
6134 if (Op.getValueType().isFixedLengthVector()) {
6135 assert(Subtarget.hasStdExtZvkb());
6136 return lowerToScalableOp(Op, DAG);
6137 }
6138 assert(Subtarget.hasVendorXTHeadBb() &&
6139 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6140 "Unexpected custom legalization");
6141 // XTHeadBb only supports rotate by constant.
6142 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6143 return SDValue();
6144 return Op;
6145 case ISD::BITCAST: {
6146 SDLoc DL(Op);
6147 EVT VT = Op.getValueType();
6148 SDValue Op0 = Op.getOperand(0);
6149 EVT Op0VT = Op0.getValueType();
6150 MVT XLenVT = Subtarget.getXLenVT();
6151 if (VT == MVT::f16 && Op0VT == MVT::i16 &&
6152 Subtarget.hasStdExtZfhminOrZhinxmin()) {
6153 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6154 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
6155 return FPConv;
6156 }
6157 if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
6158 Subtarget.hasStdExtZfbfmin()) {
6159 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6160 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
6161 return FPConv;
6162 }
6163 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6164 Subtarget.hasStdExtFOrZfinx()) {
6165 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6166 SDValue FPConv =
6167 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6168 return FPConv;
6169 }
6170 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32) {
6171 SDValue Lo, Hi;
6172 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6173 SDValue RetReg =
6174 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6175 return RetReg;
6176 }
6177
6178 // Consider other scalar<->scalar casts as legal if the types are legal.
6179 // Otherwise expand them.
6180 if (!VT.isVector() && !Op0VT.isVector()) {
6181 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6182 return Op;
6183 return SDValue();
6184 }
6185
6186 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6187 "Unexpected types");
6188
6189 if (VT.isFixedLengthVector()) {
6190 // We can handle fixed length vector bitcasts with a simple replacement
6191 // in isel.
6192 if (Op0VT.isFixedLengthVector())
6193 return Op;
6194 // When bitcasting from scalar to fixed-length vector, insert the scalar
6195 // into a one-element vector of the result type, and perform a vector
6196 // bitcast.
6197 if (!Op0VT.isVector()) {
6198 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6199 if (!isTypeLegal(BVT))
6200 return SDValue();
6201 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6202 DAG.getUNDEF(BVT), Op0,
6203 DAG.getVectorIdxConstant(0, DL)));
6204 }
6205 return SDValue();
6206 }
6207 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6208 // thus: bitcast the vector to a one-element vector type whose element type
6209 // is the same as the result type, and extract the first element.
6210 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6211 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6212 if (!isTypeLegal(BVT))
6213 return SDValue();
6214 SDValue BVec = DAG.getBitcast(BVT, Op0);
6215 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6216 DAG.getVectorIdxConstant(0, DL));
6217 }
6218 return SDValue();
6219 }
6221 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6223 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6225 return LowerINTRINSIC_VOID(Op, DAG);
6226 case ISD::IS_FPCLASS:
6227 return LowerIS_FPCLASS(Op, DAG);
6228 case ISD::BITREVERSE: {
6229 MVT VT = Op.getSimpleValueType();
6230 if (VT.isFixedLengthVector()) {
6231 assert(Subtarget.hasStdExtZvbb());
6232 return lowerToScalableOp(Op, DAG);
6233 }
6234 SDLoc DL(Op);
6235 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6236 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6237 // Expand bitreverse to a bswap(rev8) followed by brev8.
6238 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6239 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6240 }
6241 case ISD::TRUNCATE:
6244 // Only custom-lower vector truncates
6245 if (!Op.getSimpleValueType().isVector())
6246 return Op;
6247 return lowerVectorTruncLike(Op, DAG);
6248 case ISD::ANY_EXTEND:
6249 case ISD::ZERO_EXTEND:
6250 if (Op.getOperand(0).getValueType().isVector() &&
6251 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6252 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6253 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6254 case ISD::SIGN_EXTEND:
6255 if (Op.getOperand(0).getValueType().isVector() &&
6256 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6257 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6258 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6260 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6262 return lowerINSERT_VECTOR_ELT(Op, DAG);
6264 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6265 case ISD::SCALAR_TO_VECTOR: {
6266 MVT VT = Op.getSimpleValueType();
6267 SDLoc DL(Op);
6268 SDValue Scalar = Op.getOperand(0);
6269 if (VT.getVectorElementType() == MVT::i1) {
6270 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6271 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6272 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6273 }
6274 MVT ContainerVT = VT;
6275 if (VT.isFixedLengthVector())
6276 ContainerVT = getContainerForFixedLengthVector(VT);
6277 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6278 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6279 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6280 DAG.getUNDEF(ContainerVT), Scalar, VL);
6281 if (VT.isFixedLengthVector())
6282 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6283 return V;
6284 }
6285 case ISD::VSCALE: {
6286 MVT XLenVT = Subtarget.getXLenVT();
6287 MVT VT = Op.getSimpleValueType();
6288 SDLoc DL(Op);
6289 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6290 // We define our scalable vector types for lmul=1 to use a 64 bit known
6291 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6292 // vscale as VLENB / 8.
6293 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6294 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6295 report_fatal_error("Support for VLEN==32 is incomplete.");
6296 // We assume VLENB is a multiple of 8. We manually choose the best shift
6297 // here because SimplifyDemandedBits isn't always able to simplify it.
6298 uint64_t Val = Op.getConstantOperandVal(0);
6299 if (isPowerOf2_64(Val)) {
6300 uint64_t Log2 = Log2_64(Val);
6301 if (Log2 < 3)
6302 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6303 DAG.getConstant(3 - Log2, DL, VT));
6304 else if (Log2 > 3)
6305 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6306 DAG.getConstant(Log2 - 3, DL, XLenVT));
6307 } else if ((Val % 8) == 0) {
6308 // If the multiplier is a multiple of 8, scale it down to avoid needing
6309 // to shift the VLENB value.
6310 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6311 DAG.getConstant(Val / 8, DL, XLenVT));
6312 } else {
6313 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6314 DAG.getConstant(3, DL, XLenVT));
6315 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6316 DAG.getConstant(Val, DL, XLenVT));
6317 }
6318 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6319 }
6320 case ISD::FPOWI: {
6321 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6322 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6323 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6324 Op.getOperand(1).getValueType() == MVT::i32) {
6325 SDLoc DL(Op);
6326 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6327 SDValue Powi =
6328 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6329 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6330 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6331 }
6332 return SDValue();
6333 }
6334 case ISD::FMAXIMUM:
6335 case ISD::FMINIMUM:
6336 if (Op.getValueType() == MVT::nxv32f16 &&
6337 (Subtarget.hasVInstructionsF16Minimal() &&
6338 !Subtarget.hasVInstructionsF16()))
6339 return SplitVectorOp(Op, DAG);
6340 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6341 case ISD::FP_EXTEND: {
6342 SDLoc DL(Op);
6343 EVT VT = Op.getValueType();
6344 SDValue Op0 = Op.getOperand(0);
6345 EVT Op0VT = Op0.getValueType();
6346 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
6347 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6348 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
6349 SDValue FloatVal =
6350 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6351 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
6352 }
6353
6354 if (!Op.getValueType().isVector())
6355 return Op;
6356 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6357 }
6358 case ISD::FP_ROUND: {
6359 SDLoc DL(Op);
6360 EVT VT = Op.getValueType();
6361 SDValue Op0 = Op.getOperand(0);
6362 EVT Op0VT = Op0.getValueType();
6363 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
6364 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
6365 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
6366 Subtarget.hasStdExtDOrZdinx()) {
6367 SDValue FloatVal =
6368 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
6369 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6370 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
6371 }
6372
6373 if (!Op.getValueType().isVector())
6374 return Op;
6375 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6376 }
6379 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6380 case ISD::SINT_TO_FP:
6381 case ISD::UINT_TO_FP:
6382 if (Op.getValueType().isVector() &&
6383 Op.getValueType().getScalarType() == MVT::f16 &&
6384 (Subtarget.hasVInstructionsF16Minimal() &&
6385 !Subtarget.hasVInstructionsF16())) {
6386 if (Op.getValueType() == MVT::nxv32f16)
6387 return SplitVectorOp(Op, DAG);
6388 // int -> f32
6389 SDLoc DL(Op);
6390 MVT NVT =
6391 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6392 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6393 // f32 -> f16
6394 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6395 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6396 }
6397 [[fallthrough]];
6398 case ISD::FP_TO_SINT:
6399 case ISD::FP_TO_UINT:
6400 if (SDValue Op1 = Op.getOperand(0);
6401 Op1.getValueType().isVector() &&
6402 Op1.getValueType().getScalarType() == MVT::f16 &&
6403 (Subtarget.hasVInstructionsF16Minimal() &&
6404 !Subtarget.hasVInstructionsF16())) {
6405 if (Op1.getValueType() == MVT::nxv32f16)
6406 return SplitVectorOp(Op, DAG);
6407 // f16 -> f32
6408 SDLoc DL(Op);
6409 MVT NVT = MVT::getVectorVT(MVT::f32,
6410 Op1.getValueType().getVectorElementCount());
6411 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6412 // f32 -> int
6413 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6414 }
6415 [[fallthrough]];
6420 // RVV can only do fp<->int conversions to types half/double the size as
6421 // the source. We custom-lower any conversions that do two hops into
6422 // sequences.
6423 MVT VT = Op.getSimpleValueType();
6424 if (!VT.isVector())
6425 return Op;
6426 SDLoc DL(Op);
6427 bool IsStrict = Op->isStrictFPOpcode();
6428 SDValue Src = Op.getOperand(0 + IsStrict);
6429 MVT EltVT = VT.getVectorElementType();
6430 MVT SrcVT = Src.getSimpleValueType();
6431 MVT SrcEltVT = SrcVT.getVectorElementType();
6432 unsigned EltSize = EltVT.getSizeInBits();
6433 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6434 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6435 "Unexpected vector element types");
6436
6437 bool IsInt2FP = SrcEltVT.isInteger();
6438 // Widening conversions
6439 if (EltSize > (2 * SrcEltSize)) {
6440 if (IsInt2FP) {
6441 // Do a regular integer sign/zero extension then convert to float.
6442 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6444 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6445 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6448 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6449 if (IsStrict)
6450 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6451 Op.getOperand(0), Ext);
6452 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6453 }
6454 // FP2Int
6455 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6456 // Do one doubling fp_extend then complete the operation by converting
6457 // to int.
6458 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6459 if (IsStrict) {
6460 auto [FExt, Chain] =
6461 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6462 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6463 }
6464 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6465 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6466 }
6467
6468 // Narrowing conversions
6469 if (SrcEltSize > (2 * EltSize)) {
6470 if (IsInt2FP) {
6471 // One narrowing int_to_fp, then an fp_round.
6472 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6473 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6474 if (IsStrict) {
6475 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6476 DAG.getVTList(InterimFVT, MVT::Other),
6477 Op.getOperand(0), Src);
6478 SDValue Chain = Int2FP.getValue(1);
6479 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
6480 }
6481 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
6482 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
6483 }
6484 // FP2Int
6485 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6486 // representable by the integer, the result is poison.
6487 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6489 if (IsStrict) {
6490 SDValue FP2Int =
6491 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
6492 Op.getOperand(0), Src);
6493 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6494 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
6495 }
6496 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
6497 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6498 }
6499
6500 // Scalable vectors can exit here. Patterns will handle equally-sized
6501 // conversions halving/doubling ones.
6502 if (!VT.isFixedLengthVector())
6503 return Op;
6504
6505 // For fixed-length vectors we lower to a custom "VL" node.
6506 unsigned RVVOpc = 0;
6507 switch (Op.getOpcode()) {
6508 default:
6509 llvm_unreachable("Impossible opcode");
6510 case ISD::FP_TO_SINT:
6512 break;
6513 case ISD::FP_TO_UINT:
6515 break;
6516 case ISD::SINT_TO_FP:
6517 RVVOpc = RISCVISD::SINT_TO_FP_VL;
6518 break;
6519 case ISD::UINT_TO_FP:
6520 RVVOpc = RISCVISD::UINT_TO_FP_VL;
6521 break;
6524 break;
6527 break;
6530 break;
6533 break;
6534 }
6535
6536 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6537 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6538 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6539 "Expected same element count");
6540
6541 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6542
6543 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6544 if (IsStrict) {
6545 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6546 Op.getOperand(0), Src, Mask, VL);
6547 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
6548 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
6549 }
6550 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
6551 return convertFromScalableVector(VT, Src, DAG, Subtarget);
6552 }
6555 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6556 case ISD::FP_TO_BF16: {
6557 // Custom lower to ensure the libcall return is passed in an FPR on hard
6558 // float ABIs.
6559 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
6560 SDLoc DL(Op);
6561 MakeLibCallOptions CallOptions;
6562 RTLIB::Libcall LC =
6563 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
6564 SDValue Res =
6565 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6566 if (Subtarget.is64Bit())
6567 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6568 return DAG.getBitcast(MVT::i32, Res);
6569 }
6570 case ISD::BF16_TO_FP: {
6571 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6572 MVT VT = Op.getSimpleValueType();
6573 SDLoc DL(Op);
6574 Op = DAG.getNode(
6575 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
6576 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
6577 SDValue Res = Subtarget.is64Bit()
6578 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
6579 : DAG.getBitcast(MVT::f32, Op);
6580 // fp_extend if the target VT is bigger than f32.
6581 if (VT != MVT::f32)
6582 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
6583 return Res;
6584 }
6585 case ISD::FP_TO_FP16: {
6586 // Custom lower to ensure the libcall return is passed in an FPR on hard
6587 // float ABIs.
6588 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6589 SDLoc DL(Op);
6590 MakeLibCallOptions CallOptions;
6591 RTLIB::Libcall LC =
6592 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
6593 SDValue Res =
6594 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6595 if (Subtarget.is64Bit())
6596 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6597 return DAG.getBitcast(MVT::i32, Res);
6598 }
6599 case ISD::FP16_TO_FP: {
6600 // Custom lower to ensure the libcall argument is passed in an FPR on hard
6601 // float ABIs.
6602 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6603 SDLoc DL(Op);
6604 MakeLibCallOptions CallOptions;
6605 SDValue Arg = Subtarget.is64Bit()
6606 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
6607 Op.getOperand(0))
6608 : DAG.getBitcast(MVT::f32, Op.getOperand(0));
6609 SDValue Res =
6610 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
6611 .first;
6612 return Res;
6613 }
6614 case ISD::FTRUNC:
6615 case ISD::FCEIL:
6616 case ISD::FFLOOR:
6617 case ISD::FNEARBYINT:
6618 case ISD::FRINT:
6619 case ISD::FROUND:
6620 case ISD::FROUNDEVEN:
6621 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6622 case ISD::LRINT:
6623 case ISD::LLRINT:
6624 return lowerVectorXRINT(Op, DAG, Subtarget);
6625 case ISD::VECREDUCE_ADD:
6630 return lowerVECREDUCE(Op, DAG);
6631 case ISD::VECREDUCE_AND:
6632 case ISD::VECREDUCE_OR:
6633 case ISD::VECREDUCE_XOR:
6634 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6635 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
6636 return lowerVECREDUCE(Op, DAG);
6643 return lowerFPVECREDUCE(Op, DAG);
6644 case ISD::VP_REDUCE_ADD:
6645 case ISD::VP_REDUCE_UMAX:
6646 case ISD::VP_REDUCE_SMAX:
6647 case ISD::VP_REDUCE_UMIN:
6648 case ISD::VP_REDUCE_SMIN:
6649 case ISD::VP_REDUCE_FADD:
6650 case ISD::VP_REDUCE_SEQ_FADD:
6651 case ISD::VP_REDUCE_FMIN:
6652 case ISD::VP_REDUCE_FMAX:
6653 case ISD::VP_REDUCE_FMINIMUM:
6654 case ISD::VP_REDUCE_FMAXIMUM:
6655 if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
6656 (Subtarget.hasVInstructionsF16Minimal() &&
6657 !Subtarget.hasVInstructionsF16()))
6658 return SplitVectorReductionOp(Op, DAG);
6659 return lowerVPREDUCE(Op, DAG);
6660 case ISD::VP_REDUCE_AND:
6661 case ISD::VP_REDUCE_OR:
6662 case ISD::VP_REDUCE_XOR:
6663 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
6664 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
6665 return lowerVPREDUCE(Op, DAG);
6666 case ISD::VP_CTTZ_ELTS:
6667 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
6668 return lowerVPCttzElements(Op, DAG);
6669 case ISD::UNDEF: {
6670 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
6671 return convertFromScalableVector(Op.getSimpleValueType(),
6672 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
6673 }
6675 return lowerINSERT_SUBVECTOR(Op, DAG);
6677 return lowerEXTRACT_SUBVECTOR(Op, DAG);
6679 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
6681 return lowerVECTOR_INTERLEAVE(Op, DAG);
6682 case ISD::STEP_VECTOR:
6683 return lowerSTEP_VECTOR(Op, DAG);
6685 return lowerVECTOR_REVERSE(Op, DAG);
6686 case ISD::VECTOR_SPLICE:
6687 return lowerVECTOR_SPLICE(Op, DAG);
6688 case ISD::BUILD_VECTOR:
6689 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
6690 case ISD::SPLAT_VECTOR:
6691 if ((Op.getValueType().getScalarType() == MVT::f16 &&
6692 (Subtarget.hasVInstructionsF16Minimal() &&
6693 Subtarget.hasStdExtZfhminOrZhinxmin() &&
6694 !Subtarget.hasVInstructionsF16())) ||
6695 (Op.getValueType().getScalarType() == MVT::bf16 &&
6696 (Subtarget.hasVInstructionsBF16Minimal() &&
6697 Subtarget.hasStdExtZfbfmin()))) {
6698 if (Op.getValueType() == MVT::nxv32f16 ||
6699 Op.getValueType() == MVT::nxv32bf16)
6700 return SplitVectorOp(Op, DAG);
6701 SDLoc DL(Op);
6702 SDValue NewScalar =
6703 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6704 SDValue NewSplat = DAG.getNode(
6706 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),
6707 NewScalar);
6708 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat,
6709 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6710 }
6711 if (Op.getValueType().getVectorElementType() == MVT::i1)
6712 return lowerVectorMaskSplat(Op, DAG);
6713 return SDValue();
6715 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
6716 case ISD::CONCAT_VECTORS: {
6717 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6718 // better than going through the stack, as the default expansion does.
6719 SDLoc DL(Op);
6720 MVT VT = Op.getSimpleValueType();
6721 MVT ContainerVT = VT;
6722 if (VT.isFixedLengthVector())
6723 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
6724
6725 // Recursively split concat_vectors with more than 2 operands:
6726 //
6727 // concat_vector op1, op2, op3, op4
6728 // ->
6729 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
6730 //
6731 // This reduces the length of the chain of vslideups and allows us to
6732 // perform the vslideups at a smaller LMUL, limited to MF2.
6733 if (Op.getNumOperands() > 2 &&
6734 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
6735 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6737 size_t HalfNumOps = Op.getNumOperands() / 2;
6738 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6739 Op->ops().take_front(HalfNumOps));
6740 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6741 Op->ops().drop_front(HalfNumOps));
6742 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6743 }
6744
6745 unsigned NumOpElts =
6746 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
6747 SDValue Vec = DAG.getUNDEF(VT);
6748 for (const auto &OpIdx : enumerate(Op->ops())) {
6749 SDValue SubVec = OpIdx.value();
6750 // Don't insert undef subvectors.
6751 if (SubVec.isUndef())
6752 continue;
6753 Vec =
6754 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
6755 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
6756 }
6757 return Vec;
6758 }
6759 case ISD::LOAD:
6760 if (auto V = expandUnalignedRVVLoad(Op, DAG))
6761 return V;
6762 if (Op.getValueType().isFixedLengthVector())
6763 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
6764 return Op;
6765 case ISD::STORE:
6766 if (auto V = expandUnalignedRVVStore(Op, DAG))
6767 return V;
6768 if (Op.getOperand(1).getValueType().isFixedLengthVector())
6769 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
6770 return Op;
6771 case ISD::MLOAD:
6772 case ISD::VP_LOAD:
6773 return lowerMaskedLoad(Op, DAG);
6774 case ISD::MSTORE:
6775 case ISD::VP_STORE:
6776 return lowerMaskedStore(Op, DAG);
6777 case ISD::SELECT_CC: {
6778 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
6779 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6780 // into separate SETCC+SELECT just like LegalizeDAG.
6781 SDValue Tmp1 = Op.getOperand(0);
6782 SDValue Tmp2 = Op.getOperand(1);
6783 SDValue True = Op.getOperand(2);
6784 SDValue False = Op.getOperand(3);
6785 EVT VT = Op.getValueType();
6786 SDValue CC = Op.getOperand(4);
6787 EVT CmpVT = Tmp1.getValueType();
6788 EVT CCVT =
6789 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
6790 SDLoc DL(Op);
6791 SDValue Cond =
6792 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
6793 return DAG.getSelect(DL, VT, Cond, True, False);
6794 }
6795 case ISD::SETCC: {
6796 MVT OpVT = Op.getOperand(0).getSimpleValueType();
6797 if (OpVT.isScalarInteger()) {
6798 MVT VT = Op.getSimpleValueType();
6799 SDValue LHS = Op.getOperand(0);
6800 SDValue RHS = Op.getOperand(1);
6801 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6802 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
6803 "Unexpected CondCode");
6804
6805 SDLoc DL(Op);
6806
6807 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6808 // convert this to the equivalent of (set(u)ge X, C+1) by using
6809 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6810 // in a register.
6811 if (isa<ConstantSDNode>(RHS)) {
6812 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
6813 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
6814 // If this is an unsigned compare and the constant is -1, incrementing
6815 // the constant would change behavior. The result should be false.
6816 if (CCVal == ISD::SETUGT && Imm == -1)
6817 return DAG.getConstant(0, DL, VT);
6818 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6819 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6820 SDValue SetCC = DAG.getSetCC(
6821 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
6822 return DAG.getLogicalNOT(DL, SetCC, VT);
6823 }
6824 }
6825
6826 // Not a constant we could handle, swap the operands and condition code to
6827 // SETLT/SETULT.
6828 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6829 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
6830 }
6831
6832 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6833 (Subtarget.hasVInstructionsF16Minimal() &&
6834 !Subtarget.hasVInstructionsF16()))
6835 return SplitVectorOp(Op, DAG);
6836
6837 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
6838 }
6839 case ISD::ADD:
6840 case ISD::SUB:
6841 case ISD::MUL:
6842 case ISD::MULHS:
6843 case ISD::MULHU:
6844 case ISD::AND:
6845 case ISD::OR:
6846 case ISD::XOR:
6847 case ISD::SDIV:
6848 case ISD::SREM:
6849 case ISD::UDIV:
6850 case ISD::UREM:
6851 case ISD::BSWAP:
6852 case ISD::CTPOP:
6853 return lowerToScalableOp(Op, DAG);
6854 case ISD::SHL:
6855 case ISD::SRA:
6856 case ISD::SRL:
6857 if (Op.getSimpleValueType().isFixedLengthVector())
6858 return lowerToScalableOp(Op, DAG);
6859 // This can be called for an i32 shift amount that needs to be promoted.
6860 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
6861 "Unexpected custom legalisation");
6862 return SDValue();
6863 case ISD::FADD:
6864 case ISD::FSUB:
6865 case ISD::FMUL:
6866 case ISD::FDIV:
6867 case ISD::FNEG:
6868 case ISD::FABS:
6869 case ISD::FSQRT:
6870 case ISD::FMA:
6871 case ISD::FMINNUM:
6872 case ISD::FMAXNUM:
6873 if (Op.getValueType() == MVT::nxv32f16 &&
6874 (Subtarget.hasVInstructionsF16Minimal() &&
6875 !Subtarget.hasVInstructionsF16()))
6876 return SplitVectorOp(Op, DAG);
6877 [[fallthrough]];
6878 case ISD::AVGFLOORS:
6879 case ISD::AVGFLOORU:
6880 case ISD::AVGCEILS:
6881 case ISD::AVGCEILU:
6882 case ISD::SMIN:
6883 case ISD::SMAX:
6884 case ISD::UMIN:
6885 case ISD::UMAX:
6886 return lowerToScalableOp(Op, DAG);
6887 case ISD::UADDSAT:
6888 case ISD::USUBSAT:
6889 return lowerToScalableOp(Op, DAG);
6890 case ISD::SADDSAT:
6891 case ISD::SSUBSAT:
6892 return lowerToScalableOp(Op, DAG);
6893 case ISD::ABDS:
6894 case ISD::ABDU: {
6895 SDLoc dl(Op);
6896 EVT VT = Op->getValueType(0);
6897 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
6898 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
6899 bool IsSigned = Op->getOpcode() == ISD::ABDS;
6900
6901 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
6902 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
6903 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
6904 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
6905 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
6906 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
6907 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
6908 }
6909 case ISD::ABS:
6910 case ISD::VP_ABS:
6911 return lowerABS(Op, DAG);
6912 case ISD::CTLZ:
6914 case ISD::CTTZ:
6916 if (Subtarget.hasStdExtZvbb())
6917 return lowerToScalableOp(Op, DAG);
6918 assert(Op.getOpcode() != ISD::CTTZ);
6919 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6920 case ISD::VSELECT:
6921 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
6922 case ISD::FCOPYSIGN:
6923 if (Op.getValueType() == MVT::nxv32f16 &&
6924 (Subtarget.hasVInstructionsF16Minimal() &&
6925 !Subtarget.hasVInstructionsF16()))
6926 return SplitVectorOp(Op, DAG);
6927 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
6928 case ISD::STRICT_FADD:
6929 case ISD::STRICT_FSUB:
6930 case ISD::STRICT_FMUL:
6931 case ISD::STRICT_FDIV:
6932 case ISD::STRICT_FSQRT:
6933 case ISD::STRICT_FMA:
6934 if (Op.getValueType() == MVT::nxv32f16 &&
6935 (Subtarget.hasVInstructionsF16Minimal() &&
6936 !Subtarget.hasVInstructionsF16()))
6937 return SplitStrictFPVectorOp(Op, DAG);
6938 return lowerToScalableOp(Op, DAG);
6939 case ISD::STRICT_FSETCC:
6941 return lowerVectorStrictFSetcc(Op, DAG);
6942 case ISD::STRICT_FCEIL:
6943 case ISD::STRICT_FRINT:
6944 case ISD::STRICT_FFLOOR:
6945 case ISD::STRICT_FTRUNC:
6947 case ISD::STRICT_FROUND:
6949 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6950 case ISD::MGATHER:
6951 case ISD::VP_GATHER:
6952 return lowerMaskedGather(Op, DAG);
6953 case ISD::MSCATTER:
6954 case ISD::VP_SCATTER:
6955 return lowerMaskedScatter(Op, DAG);
6956 case ISD::GET_ROUNDING:
6957 return lowerGET_ROUNDING(Op, DAG);
6958 case ISD::SET_ROUNDING:
6959 return lowerSET_ROUNDING(Op, DAG);
6960 case ISD::EH_DWARF_CFA:
6961 return lowerEH_DWARF_CFA(Op, DAG);
6962 case ISD::VP_SELECT:
6963 case ISD::VP_MERGE:
6964 case ISD::VP_ADD:
6965 case ISD::VP_SUB:
6966 case ISD::VP_MUL:
6967 case ISD::VP_SDIV:
6968 case ISD::VP_UDIV:
6969 case ISD::VP_SREM:
6970 case ISD::VP_UREM:
6971 case ISD::VP_UADDSAT:
6972 case ISD::VP_USUBSAT:
6973 case ISD::VP_SADDSAT:
6974 case ISD::VP_SSUBSAT:
6975 case ISD::VP_LRINT:
6976 case ISD::VP_LLRINT:
6977 return lowerVPOp(Op, DAG);
6978 case ISD::VP_AND:
6979 case ISD::VP_OR:
6980 case ISD::VP_XOR:
6981 return lowerLogicVPOp(Op, DAG);
6982 case ISD::VP_FADD:
6983 case ISD::VP_FSUB:
6984 case ISD::VP_FMUL:
6985 case ISD::VP_FDIV:
6986 case ISD::VP_FNEG:
6987 case ISD::VP_FABS:
6988 case ISD::VP_SQRT:
6989 case ISD::VP_FMA:
6990 case ISD::VP_FMINNUM:
6991 case ISD::VP_FMAXNUM:
6992 case ISD::VP_FCOPYSIGN:
6993 if (Op.getValueType() == MVT::nxv32f16 &&
6994 (Subtarget.hasVInstructionsF16Minimal() &&
6995 !Subtarget.hasVInstructionsF16()))
6996 return SplitVPOp(Op, DAG);
6997 [[fallthrough]];
6998 case ISD::VP_SRA:
6999 case ISD::VP_SRL:
7000 case ISD::VP_SHL:
7001 return lowerVPOp(Op, DAG);
7002 case ISD::VP_IS_FPCLASS:
7003 return LowerIS_FPCLASS(Op, DAG);
7004 case ISD::VP_SIGN_EXTEND:
7005 case ISD::VP_ZERO_EXTEND:
7006 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7007 return lowerVPExtMaskOp(Op, DAG);
7008 return lowerVPOp(Op, DAG);
7009 case ISD::VP_TRUNCATE:
7010 return lowerVectorTruncLike(Op, DAG);
7011 case ISD::VP_FP_EXTEND:
7012 case ISD::VP_FP_ROUND:
7013 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7014 case ISD::VP_SINT_TO_FP:
7015 case ISD::VP_UINT_TO_FP:
7016 if (Op.getValueType().isVector() &&
7017 Op.getValueType().getScalarType() == MVT::f16 &&
7018 (Subtarget.hasVInstructionsF16Minimal() &&
7019 !Subtarget.hasVInstructionsF16())) {
7020 if (Op.getValueType() == MVT::nxv32f16)
7021 return SplitVPOp(Op, DAG);
7022 // int -> f32
7023 SDLoc DL(Op);
7024 MVT NVT =
7025 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7026 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7027 // f32 -> f16
7028 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7029 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7030 }
7031 [[fallthrough]];
7032 case ISD::VP_FP_TO_SINT:
7033 case ISD::VP_FP_TO_UINT:
7034 if (SDValue Op1 = Op.getOperand(0);
7035 Op1.getValueType().isVector() &&
7036 Op1.getValueType().getScalarType() == MVT::f16 &&
7037 (Subtarget.hasVInstructionsF16Minimal() &&
7038 !Subtarget.hasVInstructionsF16())) {
7039 if (Op1.getValueType() == MVT::nxv32f16)
7040 return SplitVPOp(Op, DAG);
7041 // f16 -> f32
7042 SDLoc DL(Op);
7043 MVT NVT = MVT::getVectorVT(MVT::f32,
7044 Op1.getValueType().getVectorElementCount());
7045 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7046 // f32 -> int
7047 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7048 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7049 }
7050 return lowerVPFPIntConvOp(Op, DAG);
7051 case ISD::VP_SETCC:
7052 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
7053 (Subtarget.hasVInstructionsF16Minimal() &&
7054 !Subtarget.hasVInstructionsF16()))
7055 return SplitVPOp(Op, DAG);
7056 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7057 return lowerVPSetCCMaskOp(Op, DAG);
7058 [[fallthrough]];
7059 case ISD::VP_SMIN:
7060 case ISD::VP_SMAX:
7061 case ISD::VP_UMIN:
7062 case ISD::VP_UMAX:
7063 case ISD::VP_BITREVERSE:
7064 case ISD::VP_BSWAP:
7065 return lowerVPOp(Op, DAG);
7066 case ISD::VP_CTLZ:
7067 case ISD::VP_CTLZ_ZERO_UNDEF:
7068 if (Subtarget.hasStdExtZvbb())
7069 return lowerVPOp(Op, DAG);
7070 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7071 case ISD::VP_CTTZ:
7072 case ISD::VP_CTTZ_ZERO_UNDEF:
7073 if (Subtarget.hasStdExtZvbb())
7074 return lowerVPOp(Op, DAG);
7075 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7076 case ISD::VP_CTPOP:
7077 return lowerVPOp(Op, DAG);
7078 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7079 return lowerVPStridedLoad(Op, DAG);
7080 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7081 return lowerVPStridedStore(Op, DAG);
7082 case ISD::VP_FCEIL:
7083 case ISD::VP_FFLOOR:
7084 case ISD::VP_FRINT:
7085 case ISD::VP_FNEARBYINT:
7086 case ISD::VP_FROUND:
7087 case ISD::VP_FROUNDEVEN:
7088 case ISD::VP_FROUNDTOZERO:
7089 if (Op.getValueType() == MVT::nxv32f16 &&
7090 (Subtarget.hasVInstructionsF16Minimal() &&
7091 !Subtarget.hasVInstructionsF16()))
7092 return SplitVPOp(Op, DAG);
7093 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7094 case ISD::VP_FMAXIMUM:
7095 case ISD::VP_FMINIMUM:
7096 if (Op.getValueType() == MVT::nxv32f16 &&
7097 (Subtarget.hasVInstructionsF16Minimal() &&
7098 !Subtarget.hasVInstructionsF16()))
7099 return SplitVPOp(Op, DAG);
7100 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7101 case ISD::EXPERIMENTAL_VP_SPLICE:
7102 return lowerVPSpliceExperimental(Op, DAG);
7103 case ISD::EXPERIMENTAL_VP_REVERSE:
7104 return lowerVPReverseExperimental(Op, DAG);
7105 case ISD::EXPERIMENTAL_VP_SPLAT:
7106 return lowerVPSplatExperimental(Op, DAG);
7107 case ISD::CLEAR_CACHE: {
7108 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
7109 "llvm.clear_cache only needs custom lower on Linux targets");
7110 SDLoc DL(Op);
7111 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7112 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
7113 Op.getOperand(2), Flags, DL);
7114 }
7115 }
7116}
7117
7118SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
7119 SDValue Start, SDValue End,
7120 SDValue Flags, SDLoc DL) const {
7121 MakeLibCallOptions CallOptions;
7122 std::pair<SDValue, SDValue> CallResult =
7123 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
7124 {Start, End, Flags}, CallOptions, DL, InChain);
7125
7126 // This function returns void so only the out chain matters.
7127 return CallResult.second;
7128}
7129
7131 SelectionDAG &DAG, unsigned Flags) {
7132 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7133}
7134
7136 SelectionDAG &DAG, unsigned Flags) {
7137 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7138 Flags);
7139}
7140
7142 SelectionDAG &DAG, unsigned Flags) {
7143 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7144 N->getOffset(), Flags);
7145}
7146
7148 SelectionDAG &DAG, unsigned Flags) {
7149 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7150}
7151
7152template <class NodeTy>
7153SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7154 bool IsLocal, bool IsExternWeak) const {
7155 SDLoc DL(N);
7156 EVT Ty = getPointerTy(DAG.getDataLayout());
7157
7158 // When HWASAN is used and tagging of global variables is enabled
7159 // they should be accessed via the GOT, since the tagged address of a global
7160 // is incompatible with existing code models. This also applies to non-pic
7161 // mode.
7162 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7163 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7164 if (IsLocal && !Subtarget.allowTaggedGlobals())
7165 // Use PC-relative addressing to access the symbol. This generates the
7166 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7167 // %pcrel_lo(auipc)).
7168 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7169
7170 // Use PC-relative addressing to access the GOT for this symbol, then load
7171 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7172 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7173 SDValue Load =
7174 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7180 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7181 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7182 return Load;
7183 }
7184
7185 switch (getTargetMachine().getCodeModel()) {
7186 default:
7187 report_fatal_error("Unsupported code model for lowering");
7188 case CodeModel::Small: {
7189 // Generate a sequence for accessing addresses within the first 2 GiB of
7190 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7191 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7192 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7193 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7194 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7195 }
7196 case CodeModel::Medium: {
7197 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7198 if (IsExternWeak) {
7199 // An extern weak symbol may be undefined, i.e. have value 0, which may
7200 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7201 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7202 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7203 SDValue Load =
7204 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7210 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7211 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7212 return Load;
7213 }
7214
7215 // Generate a sequence for accessing addresses within any 2GiB range within
7216 // the address space. This generates the pattern (PseudoLLA sym), which
7217 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7218 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7219 }
7220 }
7221}
7222
7223SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
7224 SelectionDAG &DAG) const {
7225 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7226 assert(N->getOffset() == 0 && "unexpected offset in global node");
7227 const GlobalValue *GV = N->getGlobal();
7228 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
7229}
7230
7231SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
7232 SelectionDAG &DAG) const {
7233 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
7234
7235 return getAddr(N, DAG);
7236}
7237
7238SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
7239 SelectionDAG &DAG) const {
7240 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
7241
7242 return getAddr(N, DAG);
7243}
7244
7245SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
7246 SelectionDAG &DAG) const {
7247 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
7248
7249 return getAddr(N, DAG);
7250}
7251
7252SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
7253 SelectionDAG &DAG,
7254 bool UseGOT) const {
7255 SDLoc DL(N);
7256 EVT Ty = getPointerTy(DAG.getDataLayout());
7257 const GlobalValue *GV = N->getGlobal();
7258 MVT XLenVT = Subtarget.getXLenVT();
7259
7260 if (UseGOT) {
7261 // Use PC-relative addressing to access the GOT for this TLS symbol, then
7262 // load the address from the GOT and add the thread pointer. This generates
7263 // the pattern (PseudoLA_TLS_IE sym), which expands to
7264 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
7265 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7266 SDValue Load =
7267 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
7273 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7274 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7275
7276 // Add the thread pointer.
7277 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7278 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
7279 }
7280
7281 // Generate a sequence for accessing the address relative to the thread
7282 // pointer, with the appropriate adjustment for the thread pointer offset.
7283 // This generates the pattern
7284 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
7285 SDValue AddrHi =
7287 SDValue AddrAdd =
7289 SDValue AddrLo =
7291
7292 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7293 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7294 SDValue MNAdd =
7295 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
7296 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
7297}
7298
7299SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
7300 SelectionDAG &DAG) const {
7301 SDLoc DL(N);
7302 EVT Ty = getPointerTy(DAG.getDataLayout());
7303 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
7304 const GlobalValue *GV = N->getGlobal();
7305
7306 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7307 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
7308 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
7309 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7310 SDValue Load =
7311 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
7312
7313 // Prepare argument list to generate call.
7315 ArgListEntry Entry;
7316 Entry.Node = Load;
7317 Entry.Ty = CallTy;
7318 Args.push_back(Entry);
7319
7320 // Setup call to __tls_get_addr.
7322 CLI.setDebugLoc(DL)
7323 .setChain(DAG.getEntryNode())
7324 .setLibCallee(CallingConv::C, CallTy,
7325 DAG.getExternalSymbol("__tls_get_addr", Ty),
7326 std::move(Args));
7327
7328 return LowerCallTo(CLI).first;
7329}
7330
7331SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
7332 SelectionDAG &DAG) const {
7333 SDLoc DL(N);
7334 EVT Ty = getPointerTy(DAG.getDataLayout());
7335 const GlobalValue *GV = N->getGlobal();
7336
7337 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7338 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
7339 //
7340 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
7341 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
7342 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
7343 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
7344 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7345 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
7346}
7347
7348SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
7349 SelectionDAG &DAG) const {
7350 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7351 assert(N->getOffset() == 0 && "unexpected offset in global node");
7352
7353 if (DAG.getTarget().useEmulatedTLS())
7354 return LowerToTLSEmulatedModel(N, DAG);
7355
7357
7360 report_fatal_error("In GHC calling convention TLS is not supported");
7361
7362 SDValue Addr;
7363 switch (Model) {
7365 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
7366 break;
7368 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
7369 break;
7372 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
7373 : getDynamicTLSAddr(N, DAG);
7374 break;
7375 }
7376
7377 return Addr;
7378}
7379
7380// Return true if Val is equal to (setcc LHS, RHS, CC).
7381// Return false if Val is the inverse of (setcc LHS, RHS, CC).
7382// Otherwise, return std::nullopt.
7383static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
7384 ISD::CondCode CC, SDValue Val) {
7385 assert(Val->getOpcode() == ISD::SETCC);
7386 SDValue LHS2 = Val.getOperand(0);
7387 SDValue RHS2 = Val.getOperand(1);
7388 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
7389
7390 if (LHS == LHS2 && RHS == RHS2) {
7391 if (CC == CC2)
7392 return true;
7393 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7394 return false;
7395 } else if (LHS == RHS2 && RHS == LHS2) {
7397 if (CC == CC2)
7398 return true;
7399 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7400 return false;
7401 }
7402
7403 return std::nullopt;
7404}
7405
7407 const RISCVSubtarget &Subtarget) {
7408 SDValue CondV = N->getOperand(0);
7409 SDValue TrueV = N->getOperand(1);
7410 SDValue FalseV = N->getOperand(2);
7411 MVT VT = N->getSimpleValueType(0);
7412 SDLoc DL(N);
7413
7414 if (!Subtarget.hasConditionalMoveFusion()) {
7415 // (select c, -1, y) -> -c | y
7416 if (isAllOnesConstant(TrueV)) {
7417 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7418 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
7419 }
7420 // (select c, y, -1) -> (c-1) | y
7421 if (isAllOnesConstant(FalseV)) {
7422 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7423 DAG.getAllOnesConstant(DL, VT));
7424 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
7425 }
7426
7427 // (select c, 0, y) -> (c-1) & y
7428 if (isNullConstant(TrueV)) {
7429 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7430 DAG.getAllOnesConstant(DL, VT));
7431 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
7432 }
7433 // (select c, y, 0) -> -c & y
7434 if (isNullConstant(FalseV)) {
7435 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7436 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
7437 }
7438 }
7439
7440 // select c, ~x, x --> xor -c, x
7441 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7442 const APInt &TrueVal = TrueV->getAsAPIntVal();
7443 const APInt &FalseVal = FalseV->getAsAPIntVal();
7444 if (~TrueVal == FalseVal) {
7445 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7446 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
7447 }
7448 }
7449
7450 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
7451 // when both truev and falsev are also setcc.
7452 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
7453 FalseV.getOpcode() == ISD::SETCC) {
7454 SDValue LHS = CondV.getOperand(0);
7455 SDValue RHS = CondV.getOperand(1);
7456 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7457
7458 // (select x, x, y) -> x | y
7459 // (select !x, x, y) -> x & y
7460 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
7461 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
7462 DAG.getFreeze(FalseV));
7463 }
7464 // (select x, y, x) -> x & y
7465 // (select !x, y, x) -> x | y
7466 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
7467 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
7468 DAG.getFreeze(TrueV), FalseV);
7469 }
7470 }
7471
7472 return SDValue();
7473}
7474
7475// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
7476// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
7477// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
7478// being `0` or `-1`. In such cases we can replace `select` with `and`.
7479// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
7480// than `c0`?
7481static SDValue
7483 const RISCVSubtarget &Subtarget) {
7484 if (Subtarget.hasShortForwardBranchOpt())
7485 return SDValue();
7486
7487 unsigned SelOpNo = 0;
7488 SDValue Sel = BO->getOperand(0);
7489 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
7490 SelOpNo = 1;
7491 Sel = BO->getOperand(1);
7492 }
7493
7494 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
7495 return SDValue();
7496
7497 unsigned ConstSelOpNo = 1;
7498 unsigned OtherSelOpNo = 2;
7499 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
7500 ConstSelOpNo = 2;
7501 OtherSelOpNo = 1;
7502 }
7503 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
7504 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
7505 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
7506 return SDValue();
7507
7508 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
7509 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
7510 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
7511 return SDValue();
7512
7513 SDLoc DL(Sel);
7514 EVT VT = BO->getValueType(0);
7515
7516 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
7517 if (SelOpNo == 1)
7518 std::swap(NewConstOps[0], NewConstOps[1]);
7519
7520 SDValue NewConstOp =
7521 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
7522 if (!NewConstOp)
7523 return SDValue();
7524
7525 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
7526 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
7527 return SDValue();
7528
7529 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
7530 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
7531 if (SelOpNo == 1)
7532 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
7533 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
7534
7535 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
7536 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
7537 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
7538}
7539
7540SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
7541 SDValue CondV = Op.getOperand(0);
7542 SDValue TrueV = Op.getOperand(1);
7543 SDValue FalseV = Op.getOperand(2);
7544 SDLoc DL(Op);
7545 MVT VT = Op.getSimpleValueType();
7546 MVT XLenVT = Subtarget.getXLenVT();
7547
7548 // Lower vector SELECTs to VSELECTs by splatting the condition.
7549 if (VT.isVector()) {
7550 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
7551 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
7552 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
7553 }
7554
7555 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
7556 // nodes to implement the SELECT. Performing the lowering here allows for
7557 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
7558 // sequence or RISCVISD::SELECT_CC node (branch-based select).
7559 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
7560 VT.isScalarInteger()) {
7561 // (select c, t, 0) -> (czero_eqz t, c)
7562 if (isNullConstant(FalseV))
7563 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
7564 // (select c, 0, f) -> (czero_nez f, c)
7565 if (isNullConstant(TrueV))
7566 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
7567
7568 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
7569 if (TrueV.getOpcode() == ISD::AND &&
7570 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
7571 return DAG.getNode(
7572 ISD::OR, DL, VT, TrueV,
7573 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7574 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
7575 if (FalseV.getOpcode() == ISD::AND &&
7576 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
7577 return DAG.getNode(
7578 ISD::OR, DL, VT, FalseV,
7579 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
7580
7581 // Try some other optimizations before falling back to generic lowering.
7582 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7583 return V;
7584
7585 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
7586 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
7587 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7588 const APInt &TrueVal = TrueV->getAsAPIntVal();
7589 const APInt &FalseVal = FalseV->getAsAPIntVal();
7590 const int TrueValCost = RISCVMatInt::getIntMatCost(
7591 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7592 const int FalseValCost = RISCVMatInt::getIntMatCost(
7593 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7594 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
7595 SDValue LHSVal = DAG.getConstant(
7596 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
7597 SDValue RHSVal =
7598 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
7599 SDValue CMOV =
7601 DL, VT, LHSVal, CondV);
7602 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
7603 }
7604
7605 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
7606 // Unless we have the short forward branch optimization.
7607 if (!Subtarget.hasConditionalMoveFusion())
7608 return DAG.getNode(
7609 ISD::OR, DL, VT,
7610 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
7611 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7612 }
7613
7614 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7615 return V;
7616
7617 if (Op.hasOneUse()) {
7618 unsigned UseOpc = Op->use_begin()->getOpcode();
7619 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
7620 SDNode *BinOp = *Op->use_begin();
7621 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
7622 DAG, Subtarget)) {
7623 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
7624 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
7625 // may return a constant node and cause crash in lowerSELECT.
7626 if (NewSel.getOpcode() == ISD::SELECT)
7627 return lowerSELECT(NewSel, DAG);
7628 return NewSel;
7629 }
7630 }
7631 }
7632
7633 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
7634 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
7635 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
7636 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
7637 if (FPTV && FPFV) {
7638 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
7639 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
7640 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
7641 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
7642 DAG.getConstant(1, DL, XLenVT));
7643 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
7644 }
7645 }
7646
7647 // If the condition is not an integer SETCC which operates on XLenVT, we need
7648 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
7649 // (select condv, truev, falsev)
7650 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
7651 if (CondV.getOpcode() != ISD::SETCC ||
7652 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
7653 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
7654 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
7655
7656 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
7657
7658 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7659 }
7660
7661 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7662 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7663 // advantage of the integer compare+branch instructions. i.e.:
7664 // (select (setcc lhs, rhs, cc), truev, falsev)
7665 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7666 SDValue LHS = CondV.getOperand(0);
7667 SDValue RHS = CondV.getOperand(1);
7668 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7669
7670 // Special case for a select of 2 constants that have a diffence of 1.
7671 // Normally this is done by DAGCombine, but if the select is introduced by
7672 // type legalization or op legalization, we miss it. Restricting to SETLT
7673 // case for now because that is what signed saturating add/sub need.
7674 // FIXME: We don't need the condition to be SETLT or even a SETCC,
7675 // but we would probably want to swap the true/false values if the condition
7676 // is SETGE/SETLE to avoid an XORI.
7677 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
7678 CCVal == ISD::SETLT) {
7679 const APInt &TrueVal = TrueV->getAsAPIntVal();
7680 const APInt &FalseVal = FalseV->getAsAPIntVal();
7681 if (TrueVal - 1 == FalseVal)
7682 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
7683 if (TrueVal + 1 == FalseVal)
7684 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
7685 }
7686
7687 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7688 // 1 < x ? x : 1 -> 0 < x ? x : 1
7689 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
7690 RHS == TrueV && LHS == FalseV) {
7691 LHS = DAG.getConstant(0, DL, VT);
7692 // 0 <u x is the same as x != 0.
7693 if (CCVal == ISD::SETULT) {
7694 std::swap(LHS, RHS);
7695 CCVal = ISD::SETNE;
7696 }
7697 }
7698
7699 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
7700 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
7701 RHS == FalseV) {
7702 RHS = DAG.getConstant(0, DL, VT);
7703 }
7704
7705 SDValue TargetCC = DAG.getCondCode(CCVal);
7706
7707 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
7708 // (select (setcc lhs, rhs, CC), constant, falsev)
7709 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7710 std::swap(TrueV, FalseV);
7711 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
7712 }
7713
7714 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
7715 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7716}
7717
7718SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
7719 SDValue CondV = Op.getOperand(1);
7720 SDLoc DL(Op);
7721 MVT XLenVT = Subtarget.getXLenVT();
7722
7723 if (CondV.getOpcode() == ISD::SETCC &&
7724 CondV.getOperand(0).getValueType() == XLenVT) {
7725 SDValue LHS = CondV.getOperand(0);
7726 SDValue RHS = CondV.getOperand(1);
7727 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7728
7729 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7730
7731 SDValue TargetCC = DAG.getCondCode(CCVal);
7732 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7733 LHS, RHS, TargetCC, Op.getOperand(2));
7734 }
7735
7736 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7737 CondV, DAG.getConstant(0, DL, XLenVT),
7738 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
7739}
7740
7741SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
7744
7745 SDLoc DL(Op);
7746 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
7748
7749 // vastart just stores the address of the VarArgsFrameIndex slot into the
7750 // memory location argument.
7751 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7752 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
7753 MachinePointerInfo(SV));
7754}
7755
7756SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
7757 SelectionDAG &DAG) const {
7758 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7760 MachineFrameInfo &MFI = MF.getFrameInfo();
7761 MFI.setFrameAddressIsTaken(true);
7762 Register FrameReg = RI.getFrameRegister(MF);
7763 int XLenInBytes = Subtarget.getXLen() / 8;
7764
7765 EVT VT = Op.getValueType();
7766 SDLoc DL(Op);
7767 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
7768 unsigned Depth = Op.getConstantOperandVal(0);
7769 while (Depth--) {
7770 int Offset = -(XLenInBytes * 2);
7771 SDValue Ptr = DAG.getNode(
7772 ISD::ADD, DL, VT, FrameAddr,
7774 FrameAddr =
7775 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
7776 }
7777 return FrameAddr;
7778}
7779
7780SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
7781 SelectionDAG &DAG) const {
7782 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7784 MachineFrameInfo &MFI = MF.getFrameInfo();
7785 MFI.setReturnAddressIsTaken(true);
7786 MVT XLenVT = Subtarget.getXLenVT();
7787 int XLenInBytes = Subtarget.getXLen() / 8;
7788
7790 return SDValue();
7791
7792 EVT VT = Op.getValueType();
7793 SDLoc DL(Op);
7794 unsigned Depth = Op.getConstantOperandVal(0);
7795 if (Depth) {
7796 int Off = -XLenInBytes;
7797 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
7798 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
7799 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
7800 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
7802 }
7803
7804 // Return the value of the return address register, marking it an implicit
7805 // live-in.
7806 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
7807 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
7808}
7809
7810SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
7811 SelectionDAG &DAG) const {
7812 SDLoc DL(Op);
7813 SDValue Lo = Op.getOperand(0);
7814 SDValue Hi = Op.getOperand(1);
7815 SDValue Shamt = Op.getOperand(2);
7816 EVT VT = Lo.getValueType();
7817
7818 // if Shamt-XLEN < 0: // Shamt < XLEN
7819 // Lo = Lo << Shamt
7820 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
7821 // else:
7822 // Lo = 0
7823 // Hi = Lo << (Shamt-XLEN)
7824
7825 SDValue Zero = DAG.getConstant(0, DL, VT);
7826 SDValue One = DAG.getConstant(1, DL, VT);
7827 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
7828 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7829 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7830 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7831
7832 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
7833 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
7834 SDValue ShiftRightLo =
7835 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
7836 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
7837 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
7838 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
7839
7840 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7841
7842 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
7843 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7844
7845 SDValue Parts[2] = {Lo, Hi};
7846 return DAG.getMergeValues(Parts, DL);
7847}
7848
7849SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
7850 bool IsSRA) const {
7851 SDLoc DL(Op);
7852 SDValue Lo = Op.getOperand(0);
7853 SDValue Hi = Op.getOperand(1);
7854 SDValue Shamt = Op.getOperand(2);
7855 EVT VT = Lo.getValueType();
7856
7857 // SRA expansion:
7858 // if Shamt-XLEN < 0: // Shamt < XLEN
7859 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7860 // Hi = Hi >>s Shamt
7861 // else:
7862 // Lo = Hi >>s (Shamt-XLEN);
7863 // Hi = Hi >>s (XLEN-1)
7864 //
7865 // SRL expansion:
7866 // if Shamt-XLEN < 0: // Shamt < XLEN
7867 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7868 // Hi = Hi >>u Shamt
7869 // else:
7870 // Lo = Hi >>u (Shamt-XLEN);
7871 // Hi = 0;
7872
7873 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
7874
7875 SDValue Zero = DAG.getConstant(0, DL, VT);
7876 SDValue One = DAG.getConstant(1, DL, VT);
7877 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
7878 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7879 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7880 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7881
7882 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
7883 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
7884 SDValue ShiftLeftHi =
7885 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
7886 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
7887 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
7888 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
7889 SDValue HiFalse =
7890 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
7891
7892 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7893
7894 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
7895 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7896
7897 SDValue Parts[2] = {Lo, Hi};
7898 return DAG.getMergeValues(Parts, DL);
7899}
7900
7901// Lower splats of i1 types to SETCC. For each mask vector type, we have a
7902// legal equivalently-sized i8 type, so we can use that as a go-between.
7903SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
7904 SelectionDAG &DAG) const {
7905 SDLoc DL(Op);
7906 MVT VT = Op.getSimpleValueType();
7907 SDValue SplatVal = Op.getOperand(0);
7908 // All-zeros or all-ones splats are handled specially.
7909 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
7910 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7911 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
7912 }
7913 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
7914 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7915 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
7916 }
7917 MVT InterVT = VT.changeVectorElementType(MVT::i8);
7918 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
7919 DAG.getConstant(1, DL, SplatVal.getValueType()));
7920 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
7921 SDValue Zero = DAG.getConstant(0, DL, InterVT);
7922 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
7923}
7924
7925// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
7926// illegal (currently only vXi64 RV32).
7927// FIXME: We could also catch non-constant sign-extended i32 values and lower
7928// them to VMV_V_X_VL.
7929SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
7930 SelectionDAG &DAG) const {
7931 SDLoc DL(Op);
7932 MVT VecVT = Op.getSimpleValueType();
7933 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
7934 "Unexpected SPLAT_VECTOR_PARTS lowering");
7935
7936 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
7937 SDValue Lo = Op.getOperand(0);
7938 SDValue Hi = Op.getOperand(1);
7939
7940 MVT ContainerVT = VecVT;
7941 if (VecVT.isFixedLengthVector())
7942 ContainerVT = getContainerForFixedLengthVector(VecVT);
7943
7944 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7945
7946 SDValue Res =
7947 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
7948
7949 if (VecVT.isFixedLengthVector())
7950 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
7951
7952 return Res;
7953}
7954
7955// Custom-lower extensions from mask vectors by using a vselect either with 1
7956// for zero/any-extension or -1 for sign-extension:
7957// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
7958// Note that any-extension is lowered identically to zero-extension.
7959SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
7960 int64_t ExtTrueVal) const {
7961 SDLoc DL(Op);
7962 MVT VecVT = Op.getSimpleValueType();
7963 SDValue Src = Op.getOperand(0);
7964 // Only custom-lower extensions from mask types
7965 assert(Src.getValueType().isVector() &&
7966 Src.getValueType().getVectorElementType() == MVT::i1);
7967
7968 if (VecVT.isScalableVector()) {
7969 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
7970 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
7971 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
7972 }
7973
7974 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
7975 MVT I1ContainerVT =
7976 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
7977
7978 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
7979
7980 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7981
7982 MVT XLenVT = Subtarget.getXLenVT();
7983 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
7984 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
7985
7986 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7987 DAG.getUNDEF(ContainerVT), SplatZero, VL);
7988 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7989 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
7990 SDValue Select =
7991 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
7992 SplatZero, DAG.getUNDEF(ContainerVT), VL);
7993
7994 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
7995}
7996
7997SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
7998 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
7999 MVT ExtVT = Op.getSimpleValueType();
8000 // Only custom-lower extensions from fixed-length vector types.
8001 if (!ExtVT.isFixedLengthVector())
8002 return Op;
8003 MVT VT = Op.getOperand(0).getSimpleValueType();
8004 // Grab the canonical container type for the extended type. Infer the smaller
8005 // type from that to ensure the same number of vector elements, as we know
8006 // the LMUL will be sufficient to hold the smaller type.
8007 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
8008 // Get the extended container type manually to ensure the same number of
8009 // vector elements between source and dest.
8010 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
8011 ContainerExtVT.getVectorElementCount());
8012
8013 SDValue Op1 =
8014 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
8015
8016 SDLoc DL(Op);
8017 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8018
8019 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
8020
8021 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
8022}
8023
8024// Custom-lower truncations from vectors to mask vectors by using a mask and a
8025// setcc operation:
8026// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
8027SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
8028 SelectionDAG &DAG) const {
8029 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8030 SDLoc DL(Op);
8031 EVT MaskVT = Op.getValueType();
8032 // Only expect to custom-lower truncations to mask types
8033 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
8034 "Unexpected type for vector mask lowering");
8035 SDValue Src = Op.getOperand(0);
8036 MVT VecVT = Src.getSimpleValueType();
8037 SDValue Mask, VL;
8038 if (IsVPTrunc) {
8039 Mask = Op.getOperand(1);
8040 VL = Op.getOperand(2);
8041 }
8042 // If this is a fixed vector, we need to convert it to a scalable vector.
8043 MVT ContainerVT = VecVT;
8044
8045 if (VecVT.isFixedLengthVector()) {
8046 ContainerVT = getContainerForFixedLengthVector(VecVT);
8047 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8048 if (IsVPTrunc) {
8049 MVT MaskContainerVT =
8050 getContainerForFixedLengthVector(Mask.getSimpleValueType());
8051 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
8052 }
8053 }
8054
8055 if (!IsVPTrunc) {
8056 std::tie(Mask, VL) =
8057 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8058 }
8059
8060 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
8061 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8062
8063 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8064 DAG.getUNDEF(ContainerVT), SplatOne, VL);
8065 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8066 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8067
8068 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
8069 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
8070 DAG.getUNDEF(ContainerVT), Mask, VL);
8071 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
8072 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
8073 DAG.getUNDEF(MaskContainerVT), Mask, VL});
8074 if (MaskVT.isFixedLengthVector())
8075 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
8076 return Trunc;
8077}
8078
8079SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
8080 SelectionDAG &DAG) const {
8081 unsigned Opc = Op.getOpcode();
8082 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
8083 SDLoc DL(Op);
8084
8085 MVT VT = Op.getSimpleValueType();
8086 // Only custom-lower vector truncates
8087 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8088
8089 // Truncates to mask types are handled differently
8090 if (VT.getVectorElementType() == MVT::i1)
8091 return lowerVectorMaskTruncLike(Op, DAG);
8092
8093 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
8094 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
8095 // truncate by one power of two at a time.
8096 MVT DstEltVT = VT.getVectorElementType();
8097
8098 SDValue Src = Op.getOperand(0);
8099 MVT SrcVT = Src.getSimpleValueType();
8100 MVT SrcEltVT = SrcVT.getVectorElementType();
8101
8102 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
8103 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
8104 "Unexpected vector truncate lowering");
8105
8106 MVT ContainerVT = SrcVT;
8107 SDValue Mask, VL;
8108 if (IsVPTrunc) {
8109 Mask = Op.getOperand(1);
8110 VL = Op.getOperand(2);
8111 }
8112 if (SrcVT.isFixedLengthVector()) {
8113 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8114 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8115 if (IsVPTrunc) {
8116 MVT MaskVT = getMaskTypeFor(ContainerVT);
8117 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8118 }
8119 }
8120
8121 SDValue Result = Src;
8122 if (!IsVPTrunc) {
8123 std::tie(Mask, VL) =
8124 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8125 }
8126
8127 unsigned NewOpc;
8128 if (Opc == ISD::TRUNCATE_SSAT_S)
8130 else if (Opc == ISD::TRUNCATE_USAT_U)
8132 else
8134
8135 do {
8136 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8137 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
8138 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
8139 } while (SrcEltVT != DstEltVT);
8140
8141 if (SrcVT.isFixedLengthVector())
8142 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8143
8144 return Result;
8145}
8146
8147SDValue
8148RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8149 SelectionDAG &DAG) const {
8150 SDLoc DL(Op);
8151 SDValue Chain = Op.getOperand(0);
8152 SDValue Src = Op.getOperand(1);
8153 MVT VT = Op.getSimpleValueType();
8154 MVT SrcVT = Src.getSimpleValueType();
8155 MVT ContainerVT = VT;
8156 if (VT.isFixedLengthVector()) {
8157 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8158 ContainerVT =
8159 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8160 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8161 }
8162
8163 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8164
8165 // RVV can only widen/truncate fp to types double/half the size as the source.
8166 if ((VT.getVectorElementType() == MVT::f64 &&
8167 (SrcVT.getVectorElementType() == MVT::f16 ||
8168 SrcVT.getVectorElementType() == MVT::bf16)) ||
8169 ((VT.getVectorElementType() == MVT::f16 ||
8170 VT.getVectorElementType() == MVT::bf16) &&
8171 SrcVT.getVectorElementType() == MVT::f64)) {
8172 // For double rounding, the intermediate rounding should be round-to-odd.
8173 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8176 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8177 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8178 Chain, Src, Mask, VL);
8179 Chain = Src.getValue(1);
8180 }
8181
8182 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8185 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8186 Chain, Src, Mask, VL);
8187 if (VT.isFixedLengthVector()) {
8188 // StrictFP operations have two result values. Their lowered result should
8189 // have same result count.
8190 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8191 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8192 }
8193 return Res;
8194}
8195
8196SDValue
8197RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8198 SelectionDAG &DAG) const {
8199 bool IsVP =
8200 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8201 bool IsExtend =
8202 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8203 // RVV can only do truncate fp to types half the size as the source. We
8204 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8205 // conversion instruction.
8206 SDLoc DL(Op);
8207 MVT VT = Op.getSimpleValueType();
8208
8209 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8210
8211 SDValue Src = Op.getOperand(0);
8212 MVT SrcVT = Src.getSimpleValueType();
8213
8214 bool IsDirectExtend =
8215 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8216 (SrcVT.getVectorElementType() != MVT::f16 &&
8217 SrcVT.getVectorElementType() != MVT::bf16));
8218 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
8219 VT.getVectorElementType() != MVT::bf16) ||
8220 SrcVT.getVectorElementType() != MVT::f64);
8221
8222 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
8223
8224 // Prepare any fixed-length vector operands.
8225 MVT ContainerVT = VT;
8226 SDValue Mask, VL;
8227 if (IsVP) {
8228 Mask = Op.getOperand(1);
8229 VL = Op.getOperand(2);
8230 }
8231 if (VT.isFixedLengthVector()) {
8232 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8233 ContainerVT =
8234 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8235 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8236 if (IsVP) {
8237 MVT MaskVT = getMaskTypeFor(ContainerVT);
8238 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8239 }
8240 }
8241
8242 if (!IsVP)
8243 std::tie(Mask, VL) =
8244 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8245
8246 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
8247
8248 if (IsDirectConv) {
8249 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
8250 if (VT.isFixedLengthVector())
8251 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
8252 return Src;
8253 }
8254
8255 unsigned InterConvOpc =
8257
8258 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8259 SDValue IntermediateConv =
8260 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
8261 SDValue Result =
8262 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
8263 if (VT.isFixedLengthVector())
8264 return convertFromScalableVector(VT, Result, DAG, Subtarget);
8265 return Result;
8266}
8267
8268// Given a scalable vector type and an index into it, returns the type for the
8269// smallest subvector that the index fits in. This can be used to reduce LMUL
8270// for operations like vslidedown.
8271//
8272// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
8273static std::optional<MVT>
8274getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
8275 const RISCVSubtarget &Subtarget) {
8276 assert(VecVT.isScalableVector());
8277 const unsigned EltSize = VecVT.getScalarSizeInBits();
8278 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
8279 const unsigned MinVLMAX = VectorBitsMin / EltSize;
8280 MVT SmallerVT;
8281 if (MaxIdx < MinVLMAX)
8282 SmallerVT = getLMUL1VT(VecVT);
8283 else if (MaxIdx < MinVLMAX * 2)
8284 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
8285 else if (MaxIdx < MinVLMAX * 4)
8286 SmallerVT = getLMUL1VT(VecVT)
8289 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
8290 return std::nullopt;
8291 return SmallerVT;
8292}
8293
8294// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
8295// first position of a vector, and that vector is slid up to the insert index.
8296// By limiting the active vector length to index+1 and merging with the
8297// original vector (with an undisturbed tail policy for elements >= VL), we
8298// achieve the desired result of leaving all elements untouched except the one
8299// at VL-1, which is replaced with the desired value.
8300SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
8301 SelectionDAG &DAG) const {
8302 SDLoc DL(Op);
8303 MVT VecVT = Op.getSimpleValueType();
8304 SDValue Vec = Op.getOperand(0);
8305 SDValue Val = Op.getOperand(1);
8306 SDValue Idx = Op.getOperand(2);
8307
8308 if (VecVT.getVectorElementType() == MVT::i1) {
8309 // FIXME: For now we just promote to an i8 vector and insert into that,
8310 // but this is probably not optimal.
8311 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8312 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8313 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
8314 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
8315 }
8316
8317 MVT ContainerVT = VecVT;
8318 // If the operand is a fixed-length vector, convert to a scalable one.
8319 if (VecVT.isFixedLengthVector()) {
8320 ContainerVT = getContainerForFixedLengthVector(VecVT);
8321 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8322 }
8323
8324 // If we know the index we're going to insert at, we can shrink Vec so that
8325 // we're performing the scalar inserts and slideup on a smaller LMUL.
8326 MVT OrigContainerVT = ContainerVT;
8327 SDValue OrigVec = Vec;
8328 SDValue AlignedIdx;
8329 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
8330 const unsigned OrigIdx = IdxC->getZExtValue();
8331 // Do we know an upper bound on LMUL?
8332 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
8333 DL, DAG, Subtarget)) {
8334 ContainerVT = *ShrunkVT;
8335 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
8336 }
8337
8338 // If we're compiling for an exact VLEN value, we can always perform
8339 // the insert in m1 as we can determine the register corresponding to
8340 // the index in the register group.
8341 const MVT M1VT = getLMUL1VT(ContainerVT);
8342 if (auto VLEN = Subtarget.getRealVLen();
8343 VLEN && ContainerVT.bitsGT(M1VT)) {
8344 EVT ElemVT = VecVT.getVectorElementType();
8345 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
8346 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8347 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8348 unsigned ExtractIdx =
8349 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8350 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
8351 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8352 ContainerVT = M1VT;
8353 }
8354
8355 if (AlignedIdx)
8356 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8357 AlignedIdx);
8358 }
8359
8360 MVT XLenVT = Subtarget.getXLenVT();
8361
8362 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
8363 // Even i64-element vectors on RV32 can be lowered without scalar
8364 // legalization if the most-significant 32 bits of the value are not affected
8365 // by the sign-extension of the lower 32 bits.
8366 // TODO: We could also catch sign extensions of a 32-bit value.
8367 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
8368 const auto *CVal = cast<ConstantSDNode>(Val);
8369 if (isInt<32>(CVal->getSExtValue())) {
8370 IsLegalInsert = true;
8371 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
8372 }
8373 }
8374
8375 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8376
8377 SDValue ValInVec;
8378
8379 if (IsLegalInsert) {
8380 unsigned Opc =
8382 if (isNullConstant(Idx)) {
8383 if (!VecVT.isFloatingPoint())
8384 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
8385 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
8386
8387 if (AlignedIdx)
8388 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8389 Vec, AlignedIdx);
8390 if (!VecVT.isFixedLengthVector())
8391 return Vec;
8392 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
8393 }
8394 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
8395 } else {
8396 // On RV32, i64-element vectors must be specially handled to place the
8397 // value at element 0, by using two vslide1down instructions in sequence on
8398 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
8399 // this.
8400 SDValue ValLo, ValHi;
8401 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
8402 MVT I32ContainerVT =
8403 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
8404 SDValue I32Mask =
8405 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
8406 // Limit the active VL to two.
8407 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
8408 // If the Idx is 0 we can insert directly into the vector.
8409 if (isNullConstant(Idx)) {
8410 // First slide in the lo value, then the hi in above it. We use slide1down
8411 // to avoid the register group overlap constraint of vslide1up.
8412 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8413 Vec, Vec, ValLo, I32Mask, InsertI64VL);
8414 // If the source vector is undef don't pass along the tail elements from
8415 // the previous slide1down.
8416 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
8417 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8418 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
8419 // Bitcast back to the right container type.
8420 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8421
8422 if (AlignedIdx)
8423 ValInVec =
8424 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8425 ValInVec, AlignedIdx);
8426 if (!VecVT.isFixedLengthVector())
8427 return ValInVec;
8428 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
8429 }
8430
8431 // First slide in the lo value, then the hi in above it. We use slide1down
8432 // to avoid the register group overlap constraint of vslide1up.
8433 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8434 DAG.getUNDEF(I32ContainerVT),
8435 DAG.getUNDEF(I32ContainerVT), ValLo,
8436 I32Mask, InsertI64VL);
8437 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8438 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
8439 I32Mask, InsertI64VL);
8440 // Bitcast back to the right container type.
8441 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8442 }
8443
8444 // Now that the value is in a vector, slide it into position.
8445 SDValue InsertVL =
8446 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
8447
8448 // Use tail agnostic policy if Idx is the last index of Vec.
8450 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
8451 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
8452 Policy = RISCVII::TAIL_AGNOSTIC;
8453 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
8454 Idx, Mask, InsertVL, Policy);
8455
8456 if (AlignedIdx)
8457 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8458 Slideup, AlignedIdx);
8459 if (!VecVT.isFixedLengthVector())
8460 return Slideup;
8461 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
8462}
8463
8464// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
8465// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
8466// types this is done using VMV_X_S to allow us to glean information about the
8467// sign bits of the result.
8468SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
8469 SelectionDAG &DAG) const {
8470 SDLoc DL(Op);
8471 SDValue Idx = Op.getOperand(1);
8472 SDValue Vec = Op.getOperand(0);
8473 EVT EltVT = Op.getValueType();
8474 MVT VecVT = Vec.getSimpleValueType();
8475 MVT XLenVT = Subtarget.getXLenVT();
8476
8477 if (VecVT.getVectorElementType() == MVT::i1) {
8478 // Use vfirst.m to extract the first bit.
8479 if (isNullConstant(Idx)) {
8480 MVT ContainerVT = VecVT;
8481 if (VecVT.isFixedLengthVector()) {
8482 ContainerVT = getContainerForFixedLengthVector(VecVT);
8483 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8484 }
8485 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8486 SDValue Vfirst =
8487 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
8488 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
8489 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
8490 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8491 }
8492 if (VecVT.isFixedLengthVector()) {
8493 unsigned NumElts = VecVT.getVectorNumElements();
8494 if (NumElts >= 8) {
8495 MVT WideEltVT;
8496 unsigned WidenVecLen;
8497 SDValue ExtractElementIdx;
8498 SDValue ExtractBitIdx;
8499 unsigned MaxEEW = Subtarget.getELen();
8500 MVT LargestEltVT = MVT::getIntegerVT(
8501 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
8502 if (NumElts <= LargestEltVT.getSizeInBits()) {
8503 assert(isPowerOf2_32(NumElts) &&
8504 "the number of elements should be power of 2");
8505 WideEltVT = MVT::getIntegerVT(NumElts);
8506 WidenVecLen = 1;
8507 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
8508 ExtractBitIdx = Idx;
8509 } else {
8510 WideEltVT = LargestEltVT;
8511 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
8512 // extract element index = index / element width
8513 ExtractElementIdx = DAG.getNode(
8514 ISD::SRL, DL, XLenVT, Idx,
8515 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
8516 // mask bit index = index % element width
8517 ExtractBitIdx = DAG.getNode(
8518 ISD::AND, DL, XLenVT, Idx,
8519 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
8520 }
8521 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
8522 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
8523 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
8524 Vec, ExtractElementIdx);
8525 // Extract the bit from GPR.
8526 SDValue ShiftRight =
8527 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
8528 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
8529 DAG.getConstant(1, DL, XLenVT));
8530 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8531 }
8532 }
8533 // Otherwise, promote to an i8 vector and extract from that.
8534 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8535 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8536 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
8537 }
8538
8539 // If this is a fixed vector, we need to convert it to a scalable vector.
8540 MVT ContainerVT = VecVT;
8541 if (VecVT.isFixedLengthVector()) {
8542 ContainerVT = getContainerForFixedLengthVector(VecVT);
8543 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8544 }
8545
8546 // If we're compiling for an exact VLEN value and we have a known
8547 // constant index, we can always perform the extract in m1 (or
8548 // smaller) as we can determine the register corresponding to
8549 // the index in the register group.
8550 const auto VLen = Subtarget.getRealVLen();
8551 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
8552 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
8553 MVT M1VT = getLMUL1VT(ContainerVT);
8554 unsigned OrigIdx = IdxC->getZExtValue();
8555 EVT ElemVT = VecVT.getVectorElementType();
8556 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
8557 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8558 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8559 unsigned ExtractIdx =
8560 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8561 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
8562 DAG.getVectorIdxConstant(ExtractIdx, DL));
8563 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8564 ContainerVT = M1VT;
8565 }
8566
8567 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
8568 // contains our index.
8569 std::optional<uint64_t> MaxIdx;
8570 if (VecVT.isFixedLengthVector())
8571 MaxIdx = VecVT.getVectorNumElements() - 1;
8572 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
8573 MaxIdx = IdxC->getZExtValue();
8574 if (MaxIdx) {
8575 if (auto SmallerVT =
8576 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
8577 ContainerVT = *SmallerVT;
8578 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8579 DAG.getConstant(0, DL, XLenVT));
8580 }
8581 }
8582
8583 // If after narrowing, the required slide is still greater than LMUL2,
8584 // fallback to generic expansion and go through the stack. This is done
8585 // for a subtle reason: extracting *all* elements out of a vector is
8586 // widely expected to be linear in vector size, but because vslidedown
8587 // is linear in LMUL, performing N extracts using vslidedown becomes
8588 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
8589 // seems to have the same problem (the store is linear in LMUL), but the
8590 // generic expansion *memoizes* the store, and thus for many extracts of
8591 // the same vector we end up with one store and a bunch of loads.
8592 // TODO: We don't have the same code for insert_vector_elt because we
8593 // have BUILD_VECTOR and handle the degenerate case there. Should we
8594 // consider adding an inverse BUILD_VECTOR node?
8595 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
8596 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
8597 return SDValue();
8598
8599 // If the index is 0, the vector is already in the right position.
8600 if (!isNullConstant(Idx)) {
8601 // Use a VL of 1 to avoid processing more elements than we need.
8602 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
8603 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
8604 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
8605 }
8606
8607 if (!EltVT.isInteger()) {
8608 // Floating-point extracts are handled in TableGen.
8609 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
8610 DAG.getVectorIdxConstant(0, DL));
8611 }
8612
8613 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
8614 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
8615}
8616
8617// Some RVV intrinsics may claim that they want an integer operand to be
8618// promoted or expanded.
8620 const RISCVSubtarget &Subtarget) {
8621 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
8622 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
8623 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
8624 "Unexpected opcode");
8625
8626 if (!Subtarget.hasVInstructions())
8627 return SDValue();
8628
8629 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8630 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8631 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8632
8633 SDLoc DL(Op);
8634
8636 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8637 if (!II || !II->hasScalarOperand())
8638 return SDValue();
8639
8640 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
8641 assert(SplatOp < Op.getNumOperands());
8642
8644 SDValue &ScalarOp = Operands[SplatOp];
8645 MVT OpVT = ScalarOp.getSimpleValueType();
8646 MVT XLenVT = Subtarget.getXLenVT();
8647
8648 // If this isn't a scalar, or its type is XLenVT we're done.
8649 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8650 return SDValue();
8651
8652 // Simplest case is that the operand needs to be promoted to XLenVT.
8653 if (OpVT.bitsLT(XLenVT)) {
8654 // If the operand is a constant, sign extend to increase our chances
8655 // of being able to use a .vi instruction. ANY_EXTEND would become a
8656 // a zero extend and the simm5 check in isel would fail.
8657 // FIXME: Should we ignore the upper bits in isel instead?
8658 unsigned ExtOpc =
8659 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8660 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8661 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8662 }
8663
8664 // Use the previous operand to get the vXi64 VT. The result might be a mask
8665 // VT for compares. Using the previous operand assumes that the previous
8666 // operand will never have a smaller element size than a scalar operand and
8667 // that a widening operation never uses SEW=64.
8668 // NOTE: If this fails the below assert, we can probably just find the
8669 // element count from any operand or result and use it to construct the VT.
8670 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
8671 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
8672
8673 // The more complex case is when the scalar is larger than XLenVT.
8674 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
8675 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
8676
8677 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
8678 // instruction to sign-extend since SEW>XLEN.
8679 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
8680 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
8681 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8682 }
8683
8684 switch (IntNo) {
8685 case Intrinsic::riscv_vslide1up:
8686 case Intrinsic::riscv_vslide1down:
8687 case Intrinsic::riscv_vslide1up_mask:
8688 case Intrinsic::riscv_vslide1down_mask: {
8689 // We need to special case these when the scalar is larger than XLen.
8690 unsigned NumOps = Op.getNumOperands();
8691 bool IsMasked = NumOps == 7;
8692
8693 // Convert the vector source to the equivalent nxvXi32 vector.
8694 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
8695 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
8696 SDValue ScalarLo, ScalarHi;
8697 std::tie(ScalarLo, ScalarHi) =
8698 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
8699
8700 // Double the VL since we halved SEW.
8701 SDValue AVL = getVLOperand(Op);
8702 SDValue I32VL;
8703
8704 // Optimize for constant AVL
8705 if (isa<ConstantSDNode>(AVL)) {
8706 const auto [MinVLMAX, MaxVLMAX] =
8708
8709 uint64_t AVLInt = AVL->getAsZExtVal();
8710 if (AVLInt <= MinVLMAX) {
8711 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
8712 } else if (AVLInt >= 2 * MaxVLMAX) {
8713 // Just set vl to VLMAX in this situation
8714 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
8715 } else {
8716 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
8717 // is related to the hardware implementation.
8718 // So let the following code handle
8719 }
8720 }
8721 if (!I32VL) {
8723 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8724 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
8725 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8726 SDValue SETVL =
8727 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
8728 // Using vsetvli instruction to get actually used length which related to
8729 // the hardware implementation
8730 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
8731 SEW, LMUL);
8732 I32VL =
8733 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
8734 }
8735
8736 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
8737
8738 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
8739 // instructions.
8740 SDValue Passthru;
8741 if (IsMasked)
8742 Passthru = DAG.getUNDEF(I32VT);
8743 else
8744 Passthru = DAG.getBitcast(I32VT, Operands[1]);
8745
8746 if (IntNo == Intrinsic::riscv_vslide1up ||
8747 IntNo == Intrinsic::riscv_vslide1up_mask) {
8748 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8749 ScalarHi, I32Mask, I32VL);
8750 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8751 ScalarLo, I32Mask, I32VL);
8752 } else {
8753 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8754 ScalarLo, I32Mask, I32VL);
8755 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8756 ScalarHi, I32Mask, I32VL);
8757 }
8758
8759 // Convert back to nxvXi64.
8760 Vec = DAG.getBitcast(VT, Vec);
8761
8762 if (!IsMasked)
8763 return Vec;
8764 // Apply mask after the operation.
8765 SDValue Mask = Operands[NumOps - 3];
8766 SDValue MaskedOff = Operands[1];
8767 // Assume Policy operand is the last operand.
8768 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
8769 // We don't need to select maskedoff if it's undef.
8770 if (MaskedOff.isUndef())
8771 return Vec;
8772 // TAMU
8773 if (Policy == RISCVII::TAIL_AGNOSTIC)
8774 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8775 DAG.getUNDEF(VT), AVL);
8776 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8777 // It's fine because vmerge does not care mask policy.
8778 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8779 MaskedOff, AVL);
8780 }
8781 }
8782
8783 // We need to convert the scalar to a splat vector.
8784 SDValue VL = getVLOperand(Op);
8785 assert(VL.getValueType() == XLenVT);
8786 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
8787 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8788}
8789
8790// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8791// scalable vector llvm.get.vector.length for now.
8792//
8793// We need to convert from a scalable VF to a vsetvli with VLMax equal to
8794// (vscale * VF). The vscale and VF are independent of element width. We use
8795// SEW=8 for the vsetvli because it is the only element width that supports all
8796// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8797// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
8798// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8799// SEW and LMUL are better for the surrounding vector instructions.
8801 const RISCVSubtarget &Subtarget) {
8802 MVT XLenVT = Subtarget.getXLenVT();
8803
8804 // The smallest LMUL is only valid for the smallest element width.
8805 const unsigned ElementWidth = 8;
8806
8807 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
8808 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
8809 // We don't support VF==1 with ELEN==32.
8810 [[maybe_unused]] unsigned MinVF =
8811 RISCV::RVVBitsPerBlock / Subtarget.getELen();
8812
8813 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
8814 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
8815 "Unexpected VF");
8816
8817 bool Fractional = VF < LMul1VF;
8818 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
8819 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
8820 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
8821
8822 SDLoc DL(N);
8823
8824 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
8825 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
8826
8827 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
8828
8829 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
8830 SDValue Res =
8831 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
8832 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
8833}
8834
8836 const RISCVSubtarget &Subtarget) {
8837 SDValue Op0 = N->getOperand(1);
8838 MVT OpVT = Op0.getSimpleValueType();
8839 MVT ContainerVT = OpVT;
8840 if (OpVT.isFixedLengthVector()) {
8841 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
8842 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
8843 }
8844 MVT XLenVT = Subtarget.getXLenVT();
8845 SDLoc DL(N);
8846 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
8847 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
8848 if (isOneConstant(N->getOperand(2)))
8849 return Res;
8850
8851 // Convert -1 to VL.
8852 SDValue Setcc =
8853 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
8854 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
8855 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
8856}
8857
8858static inline void promoteVCIXScalar(const SDValue &Op,
8860 SelectionDAG &DAG) {
8861 const RISCVSubtarget &Subtarget =
8863
8864 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8865 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8866 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8867 SDLoc DL(Op);
8868
8870 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8871 if (!II || !II->hasScalarOperand())
8872 return;
8873
8874 unsigned SplatOp = II->ScalarOperand + 1;
8875 assert(SplatOp < Op.getNumOperands());
8876
8877 SDValue &ScalarOp = Operands[SplatOp];
8878 MVT OpVT = ScalarOp.getSimpleValueType();
8879 MVT XLenVT = Subtarget.getXLenVT();
8880
8881 // The code below is partially copied from lowerVectorIntrinsicScalars.
8882 // If this isn't a scalar, or its type is XLenVT we're done.
8883 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8884 return;
8885
8886 // Manually emit promote operation for scalar operation.
8887 if (OpVT.bitsLT(XLenVT)) {
8888 unsigned ExtOpc =
8889 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8890 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8891 }
8892
8893 return;
8894}
8895
8896static void processVCIXOperands(SDValue &OrigOp,
8898 SelectionDAG &DAG) {
8899 promoteVCIXScalar(OrigOp, Operands, DAG);
8900 const RISCVSubtarget &Subtarget =
8902 for (SDValue &V : Operands) {
8903 EVT ValType = V.getValueType();
8904 if (ValType.isVector() && ValType.isFloatingPoint()) {
8905 MVT InterimIVT =
8906 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
8907 ValType.getVectorElementCount());
8908 V = DAG.getBitcast(InterimIVT, V);
8909 }
8910 if (ValType.isFixedLengthVector()) {
8911 MVT OpContainerVT = getContainerForFixedLengthVector(
8912 DAG, V.getSimpleValueType(), Subtarget);
8913 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
8914 }
8915 }
8916}
8917
8918// LMUL * VLEN should be greater than or equal to EGS * SEW
8919static inline bool isValidEGW(int EGS, EVT VT,
8920 const RISCVSubtarget &Subtarget) {
8921 return (Subtarget.getRealMinVLen() *
8923 EGS * VT.getScalarSizeInBits();
8924}
8925
8926SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8927 SelectionDAG &DAG) const {
8928 unsigned IntNo = Op.getConstantOperandVal(0);
8929 SDLoc DL(Op);
8930 MVT XLenVT = Subtarget.getXLenVT();
8931
8932 switch (IntNo) {
8933 default:
8934 break; // Don't custom lower most intrinsics.
8935 case Intrinsic::thread_pointer: {
8936 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8937 return DAG.getRegister(RISCV::X4, PtrVT);
8938 }
8939 case Intrinsic::riscv_orc_b:
8940 case Intrinsic::riscv_brev8:
8941 case Intrinsic::riscv_sha256sig0:
8942 case Intrinsic::riscv_sha256sig1:
8943 case Intrinsic::riscv_sha256sum0:
8944 case Intrinsic::riscv_sha256sum1:
8945 case Intrinsic::riscv_sm3p0:
8946 case Intrinsic::riscv_sm3p1: {
8947 unsigned Opc;
8948 switch (IntNo) {
8949 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
8950 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
8951 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
8952 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
8953 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
8954 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
8955 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
8956 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
8957 }
8958
8959 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8960 }
8961 case Intrinsic::riscv_sm4ks:
8962 case Intrinsic::riscv_sm4ed: {
8963 unsigned Opc =
8964 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
8965
8966 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
8967 Op.getOperand(3));
8968 }
8969 case Intrinsic::riscv_zip:
8970 case Intrinsic::riscv_unzip: {
8971 unsigned Opc =
8972 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
8973 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8974 }
8975 case Intrinsic::riscv_mopr:
8976 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
8977 Op.getOperand(2));
8978
8979 case Intrinsic::riscv_moprr: {
8980 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
8981 Op.getOperand(2), Op.getOperand(3));
8982 }
8983 case Intrinsic::riscv_clmul:
8984 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
8985 Op.getOperand(2));
8986 case Intrinsic::riscv_clmulh:
8987 case Intrinsic::riscv_clmulr: {
8988 unsigned Opc =
8989 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
8990 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
8991 }
8992 case Intrinsic::experimental_get_vector_length:
8993 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
8994 case Intrinsic::experimental_cttz_elts:
8995 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
8996 case Intrinsic::riscv_vmv_x_s: {
8997 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
8998 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
8999 }
9000 case Intrinsic::riscv_vfmv_f_s:
9001 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
9002 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
9003 case Intrinsic::riscv_vmv_v_x:
9004 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
9005 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
9006 Subtarget);
9007 case Intrinsic::riscv_vfmv_v_f:
9008 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
9009 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9010 case Intrinsic::riscv_vmv_s_x: {
9011 SDValue Scalar = Op.getOperand(2);
9012
9013 if (Scalar.getValueType().bitsLE(XLenVT)) {
9014 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
9015 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
9016 Op.getOperand(1), Scalar, Op.getOperand(3));
9017 }
9018
9019 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
9020
9021 // This is an i64 value that lives in two scalar registers. We have to
9022 // insert this in a convoluted way. First we build vXi64 splat containing
9023 // the two values that we assemble using some bit math. Next we'll use
9024 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
9025 // to merge element 0 from our splat into the source vector.
9026 // FIXME: This is probably not the best way to do this, but it is
9027 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
9028 // point.
9029 // sw lo, (a0)
9030 // sw hi, 4(a0)
9031 // vlse vX, (a0)
9032 //
9033 // vid.v vVid
9034 // vmseq.vx mMask, vVid, 0
9035 // vmerge.vvm vDest, vSrc, vVal, mMask
9036 MVT VT = Op.getSimpleValueType();
9037 SDValue Vec = Op.getOperand(1);
9038 SDValue VL = getVLOperand(Op);
9039
9040 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
9041 if (Op.getOperand(1).isUndef())
9042 return SplattedVal;
9043 SDValue SplattedIdx =
9044 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9045 DAG.getConstant(0, DL, MVT::i32), VL);
9046
9047 MVT MaskVT = getMaskTypeFor(VT);
9048 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9049 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9050 SDValue SelectCond =
9051 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9052 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9053 DAG.getUNDEF(MaskVT), Mask, VL});
9054 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9055 Vec, DAG.getUNDEF(VT), VL);
9056 }
9057 case Intrinsic::riscv_vfmv_s_f:
9058 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9059 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9060 // EGS * EEW >= 128 bits
9061 case Intrinsic::riscv_vaesdf_vv:
9062 case Intrinsic::riscv_vaesdf_vs:
9063 case Intrinsic::riscv_vaesdm_vv:
9064 case Intrinsic::riscv_vaesdm_vs:
9065 case Intrinsic::riscv_vaesef_vv:
9066 case Intrinsic::riscv_vaesef_vs:
9067 case Intrinsic::riscv_vaesem_vv:
9068 case Intrinsic::riscv_vaesem_vs:
9069 case Intrinsic::riscv_vaeskf1:
9070 case Intrinsic::riscv_vaeskf2:
9071 case Intrinsic::riscv_vaesz_vs:
9072 case Intrinsic::riscv_vsm4k:
9073 case Intrinsic::riscv_vsm4r_vv:
9074 case Intrinsic::riscv_vsm4r_vs: {
9075 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9076 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9077 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9078 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9079 return Op;
9080 }
9081 // EGS * EEW >= 256 bits
9082 case Intrinsic::riscv_vsm3c:
9083 case Intrinsic::riscv_vsm3me: {
9084 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9085 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9086 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9087 return Op;
9088 }
9089 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9090 case Intrinsic::riscv_vsha2ch:
9091 case Intrinsic::riscv_vsha2cl:
9092 case Intrinsic::riscv_vsha2ms: {
9093 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9094 !Subtarget.hasStdExtZvknhb())
9095 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9096 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9097 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9098 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9099 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9100 return Op;
9101 }
9102 case Intrinsic::riscv_sf_vc_v_x:
9103 case Intrinsic::riscv_sf_vc_v_i:
9104 case Intrinsic::riscv_sf_vc_v_xv:
9105 case Intrinsic::riscv_sf_vc_v_iv:
9106 case Intrinsic::riscv_sf_vc_v_vv:
9107 case Intrinsic::riscv_sf_vc_v_fv:
9108 case Intrinsic::riscv_sf_vc_v_xvv:
9109 case Intrinsic::riscv_sf_vc_v_ivv:
9110 case Intrinsic::riscv_sf_vc_v_vvv:
9111 case Intrinsic::riscv_sf_vc_v_fvv:
9112 case Intrinsic::riscv_sf_vc_v_xvw:
9113 case Intrinsic::riscv_sf_vc_v_ivw:
9114 case Intrinsic::riscv_sf_vc_v_vvw:
9115 case Intrinsic::riscv_sf_vc_v_fvw: {
9116 MVT VT = Op.getSimpleValueType();
9117
9118 SmallVector<SDValue> Operands{Op->op_values()};
9120
9121 MVT RetVT = VT;
9122 if (VT.isFixedLengthVector())
9124 else if (VT.isFloatingPoint())
9127
9128 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9129
9130 if (VT.isFixedLengthVector())
9131 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9132 else if (VT.isFloatingPoint())
9133 NewNode = DAG.getBitcast(VT, NewNode);
9134
9135 if (Op == NewNode)
9136 break;
9137
9138 return NewNode;
9139 }
9140 }
9141
9142 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9143}
9144
9146 unsigned Type) {
9147 SDLoc DL(Op);
9148 SmallVector<SDValue> Operands{Op->op_values()};
9149 Operands.erase(Operands.begin() + 1);
9150
9151 const RISCVSubtarget &Subtarget =
9153 MVT VT = Op.getSimpleValueType();
9154 MVT RetVT = VT;
9155 MVT FloatVT = VT;
9156
9157 if (VT.isFloatingPoint()) {
9158 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9160 FloatVT = RetVT;
9161 }
9162 if (VT.isFixedLengthVector())
9164 Subtarget);
9165
9167
9168 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9169 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9170 SDValue Chain = NewNode.getValue(1);
9171
9172 if (VT.isFixedLengthVector())
9173 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9174 if (VT.isFloatingPoint())
9175 NewNode = DAG.getBitcast(VT, NewNode);
9176
9177 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
9178
9179 return NewNode;
9180}
9181
9183 unsigned Type) {
9184 SmallVector<SDValue> Operands{Op->op_values()};
9185 Operands.erase(Operands.begin() + 1);
9187
9188 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
9189}
9190
9191SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
9192 SelectionDAG &DAG) const {
9193 unsigned IntNo = Op.getConstantOperandVal(1);
9194 switch (IntNo) {
9195 default:
9196 break;
9197 case Intrinsic::riscv_seg2_load:
9198 case Intrinsic::riscv_seg3_load:
9199 case Intrinsic::riscv_seg4_load:
9200 case Intrinsic::riscv_seg5_load:
9201 case Intrinsic::riscv_seg6_load:
9202 case Intrinsic::riscv_seg7_load:
9203 case Intrinsic::riscv_seg8_load: {
9204 SDLoc DL(Op);
9205 static const Intrinsic::ID VlsegInts[7] = {
9206 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
9207 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
9208 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
9209 Intrinsic::riscv_vlseg8};
9210 unsigned NF = Op->getNumValues() - 1;
9211 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9212 MVT XLenVT = Subtarget.getXLenVT();
9213 MVT VT = Op->getSimpleValueType(0);
9214 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9215
9216 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
9217 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
9218 auto *Load = cast<MemIntrinsicSDNode>(Op);
9219 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
9220 ContainerVTs.push_back(MVT::Other);
9221 SDVTList VTs = DAG.getVTList(ContainerVTs);
9222 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
9223 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
9224 Ops.push_back(Op.getOperand(2));
9225 Ops.push_back(VL);
9226 SDValue Result =
9228 Load->getMemoryVT(), Load->getMemOperand());
9230 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
9231 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
9232 DAG, Subtarget));
9233 Results.push_back(Result.getValue(NF));
9234 return DAG.getMergeValues(Results, DL);
9235 }
9236 case Intrinsic::riscv_sf_vc_v_x_se:
9238 case Intrinsic::riscv_sf_vc_v_i_se:
9240 case Intrinsic::riscv_sf_vc_v_xv_se:
9242 case Intrinsic::riscv_sf_vc_v_iv_se:
9244 case Intrinsic::riscv_sf_vc_v_vv_se:
9246 case Intrinsic::riscv_sf_vc_v_fv_se:
9248 case Intrinsic::riscv_sf_vc_v_xvv_se:
9250 case Intrinsic::riscv_sf_vc_v_ivv_se:
9252 case Intrinsic::riscv_sf_vc_v_vvv_se:
9254 case Intrinsic::riscv_sf_vc_v_fvv_se:
9256 case Intrinsic::riscv_sf_vc_v_xvw_se:
9258 case Intrinsic::riscv_sf_vc_v_ivw_se:
9260 case Intrinsic::riscv_sf_vc_v_vvw_se:
9262 case Intrinsic::riscv_sf_vc_v_fvw_se:
9264 }
9265
9266 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9267}
9268
9269SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9270 SelectionDAG &DAG) const {
9271 unsigned IntNo = Op.getConstantOperandVal(1);
9272 switch (IntNo) {
9273 default:
9274 break;
9275 case Intrinsic::riscv_seg2_store:
9276 case Intrinsic::riscv_seg3_store:
9277 case Intrinsic::riscv_seg4_store:
9278 case Intrinsic::riscv_seg5_store:
9279 case Intrinsic::riscv_seg6_store:
9280 case Intrinsic::riscv_seg7_store:
9281 case Intrinsic::riscv_seg8_store: {
9282 SDLoc DL(Op);
9283 static const Intrinsic::ID VssegInts[] = {
9284 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
9285 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
9286 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
9287 Intrinsic::riscv_vsseg8};
9288 // Operands are (chain, int_id, vec*, ptr, vl)
9289 unsigned NF = Op->getNumOperands() - 4;
9290 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9291 MVT XLenVT = Subtarget.getXLenVT();
9292 MVT VT = Op->getOperand(2).getSimpleValueType();
9293 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9294
9295 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
9296 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
9297 SDValue Ptr = Op->getOperand(NF + 2);
9298
9299 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
9300 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
9301 for (unsigned i = 0; i < NF; i++)
9303 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));
9304 Ops.append({Ptr, VL});
9305
9306 return DAG.getMemIntrinsicNode(
9307 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
9308 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
9309 }
9310 case Intrinsic::riscv_sf_vc_xv_se:
9312 case Intrinsic::riscv_sf_vc_iv_se:
9314 case Intrinsic::riscv_sf_vc_vv_se:
9316 case Intrinsic::riscv_sf_vc_fv_se:
9318 case Intrinsic::riscv_sf_vc_xvv_se:
9320 case Intrinsic::riscv_sf_vc_ivv_se:
9322 case Intrinsic::riscv_sf_vc_vvv_se:
9324 case Intrinsic::riscv_sf_vc_fvv_se:
9326 case Intrinsic::riscv_sf_vc_xvw_se:
9328 case Intrinsic::riscv_sf_vc_ivw_se:
9330 case Intrinsic::riscv_sf_vc_vvw_se:
9332 case Intrinsic::riscv_sf_vc_fvw_se:
9334 }
9335
9336 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9337}
9338
9339static unsigned getRVVReductionOp(unsigned ISDOpcode) {
9340 switch (ISDOpcode) {
9341 default:
9342 llvm_unreachable("Unhandled reduction");
9343 case ISD::VP_REDUCE_ADD:
9344 case ISD::VECREDUCE_ADD:
9346 case ISD::VP_REDUCE_UMAX:
9349 case ISD::VP_REDUCE_SMAX:
9352 case ISD::VP_REDUCE_UMIN:
9355 case ISD::VP_REDUCE_SMIN:
9358 case ISD::VP_REDUCE_AND:
9359 case ISD::VECREDUCE_AND:
9361 case ISD::VP_REDUCE_OR:
9362 case ISD::VECREDUCE_OR:
9364 case ISD::VP_REDUCE_XOR:
9365 case ISD::VECREDUCE_XOR:
9367 case ISD::VP_REDUCE_FADD:
9369 case ISD::VP_REDUCE_SEQ_FADD:
9371 case ISD::VP_REDUCE_FMAX:
9372 case ISD::VP_REDUCE_FMAXIMUM:
9374 case ISD::VP_REDUCE_FMIN:
9375 case ISD::VP_REDUCE_FMINIMUM:
9377 }
9378
9379}
9380
9381SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
9382 SelectionDAG &DAG,
9383 bool IsVP) const {
9384 SDLoc DL(Op);
9385 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
9386 MVT VecVT = Vec.getSimpleValueType();
9387 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
9388 Op.getOpcode() == ISD::VECREDUCE_OR ||
9389 Op.getOpcode() == ISD::VECREDUCE_XOR ||
9390 Op.getOpcode() == ISD::VP_REDUCE_AND ||
9391 Op.getOpcode() == ISD::VP_REDUCE_OR ||
9392 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
9393 "Unexpected reduction lowering");
9394
9395 MVT XLenVT = Subtarget.getXLenVT();
9396
9397 MVT ContainerVT = VecVT;
9398 if (VecVT.isFixedLengthVector()) {
9399 ContainerVT = getContainerForFixedLengthVector(VecVT);
9400 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9401 }
9402
9403 SDValue Mask, VL;
9404 if (IsVP) {
9405 Mask = Op.getOperand(2);
9406 VL = Op.getOperand(3);
9407 } else {
9408 std::tie(Mask, VL) =
9409 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9410 }
9411
9412 unsigned BaseOpc;
9414 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9415
9416 switch (Op.getOpcode()) {
9417 default:
9418 llvm_unreachable("Unhandled reduction");
9419 case ISD::VECREDUCE_AND:
9420 case ISD::VP_REDUCE_AND: {
9421 // vcpop ~x == 0
9422 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
9423 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
9424 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9425 CC = ISD::SETEQ;
9426 BaseOpc = ISD::AND;
9427 break;
9428 }
9429 case ISD::VECREDUCE_OR:
9430 case ISD::VP_REDUCE_OR:
9431 // vcpop x != 0
9432 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9433 CC = ISD::SETNE;
9434 BaseOpc = ISD::OR;
9435 break;
9436 case ISD::VECREDUCE_XOR:
9437 case ISD::VP_REDUCE_XOR: {
9438 // ((vcpop x) & 1) != 0
9439 SDValue One = DAG.getConstant(1, DL, XLenVT);
9440 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9441 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
9442 CC = ISD::SETNE;
9443 BaseOpc = ISD::XOR;
9444 break;
9445 }
9446 }
9447
9448 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
9449 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
9450
9451 if (!IsVP)
9452 return SetCC;
9453
9454 // Now include the start value in the operation.
9455 // Note that we must return the start value when no elements are operated
9456 // upon. The vcpop instructions we've emitted in each case above will return
9457 // 0 for an inactive vector, and so we've already received the neutral value:
9458 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
9459 // can simply include the start value.
9460 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
9461}
9462
9463static bool isNonZeroAVL(SDValue AVL) {
9464 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
9465 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
9466 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
9467 (ImmAVL && ImmAVL->getZExtValue() >= 1);
9468}
9469
9470/// Helper to lower a reduction sequence of the form:
9471/// scalar = reduce_op vec, scalar_start
9472static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
9473 SDValue StartValue, SDValue Vec, SDValue Mask,
9474 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
9475 const RISCVSubtarget &Subtarget) {
9476 const MVT VecVT = Vec.getSimpleValueType();
9477 const MVT M1VT = getLMUL1VT(VecVT);
9478 const MVT XLenVT = Subtarget.getXLenVT();
9479 const bool NonZeroAVL = isNonZeroAVL(VL);
9480
9481 // The reduction needs an LMUL1 input; do the splat at either LMUL1
9482 // or the original VT if fractional.
9483 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
9484 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
9485 // prove it is non-zero. For the AVL=0 case, we need the scalar to
9486 // be the result of the reduction operation.
9487 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
9488 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
9489 DAG, Subtarget);
9490 if (M1VT != InnerVT)
9491 InitialValue =
9492 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
9493 InitialValue, DAG.getVectorIdxConstant(0, DL));
9494 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
9495 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
9496 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
9497 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
9498 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
9499 DAG.getVectorIdxConstant(0, DL));
9500}
9501
9502SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
9503 SelectionDAG &DAG) const {
9504 SDLoc DL(Op);
9505 SDValue Vec = Op.getOperand(0);
9506 EVT VecEVT = Vec.getValueType();
9507
9508 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
9509
9510 // Due to ordering in legalize types we may have a vector type that needs to
9511 // be split. Do that manually so we can get down to a legal type.
9512 while (getTypeAction(*DAG.getContext(), VecEVT) ==
9514 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
9515 VecEVT = Lo.getValueType();
9516 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
9517 }
9518
9519 // TODO: The type may need to be widened rather than split. Or widened before
9520 // it can be split.
9521 if (!isTypeLegal(VecEVT))
9522 return SDValue();
9523
9524 MVT VecVT = VecEVT.getSimpleVT();
9525 MVT VecEltVT = VecVT.getVectorElementType();
9526 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9527
9528 MVT ContainerVT = VecVT;
9529 if (VecVT.isFixedLengthVector()) {
9530 ContainerVT = getContainerForFixedLengthVector(VecVT);
9531 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9532 }
9533
9534 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9535
9536 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
9537 switch (BaseOpc) {
9538 case ISD::AND:
9539 case ISD::OR:
9540 case ISD::UMAX:
9541 case ISD::UMIN:
9542 case ISD::SMAX:
9543 case ISD::SMIN:
9544 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
9545 DAG.getVectorIdxConstant(0, DL));
9546 }
9547 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
9548 Mask, VL, DL, DAG, Subtarget);
9549}
9550
9551// Given a reduction op, this function returns the matching reduction opcode,
9552// the vector SDValue and the scalar SDValue required to lower this to a
9553// RISCVISD node.
9554static std::tuple<unsigned, SDValue, SDValue>
9556 const RISCVSubtarget &Subtarget) {
9557 SDLoc DL(Op);
9558 auto Flags = Op->getFlags();
9559 unsigned Opcode = Op.getOpcode();
9560 switch (Opcode) {
9561 default:
9562 llvm_unreachable("Unhandled reduction");
9563 case ISD::VECREDUCE_FADD: {
9564 // Use positive zero if we can. It is cheaper to materialize.
9565 SDValue Zero =
9566 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
9567 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
9568 }
9570 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
9571 Op.getOperand(0));
9575 case ISD::VECREDUCE_FMAX: {
9576 SDValue Front =
9577 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
9578 DAG.getVectorIdxConstant(0, DL));
9579 unsigned RVVOpc =
9580 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
9583 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
9584 }
9585 }
9586}
9587
9588SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
9589 SelectionDAG &DAG) const {
9590 SDLoc DL(Op);
9591 MVT VecEltVT = Op.getSimpleValueType();
9592
9593 unsigned RVVOpcode;
9594 SDValue VectorVal, ScalarVal;
9595 std::tie(RVVOpcode, VectorVal, ScalarVal) =
9596 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
9597 MVT VecVT = VectorVal.getSimpleValueType();
9598
9599 MVT ContainerVT = VecVT;
9600 if (VecVT.isFixedLengthVector()) {
9601 ContainerVT = getContainerForFixedLengthVector(VecVT);
9602 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
9603 }
9604
9605 MVT ResVT = Op.getSimpleValueType();
9606 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9607 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
9608 VL, DL, DAG, Subtarget);
9609 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
9610 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
9611 return Res;
9612
9613 if (Op->getFlags().hasNoNaNs())
9614 return Res;
9615
9616 // Force output to NaN if any element is Nan.
9617 SDValue IsNan =
9618 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
9619 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
9620 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
9621 MVT XLenVT = Subtarget.getXLenVT();
9622 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
9623 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
9624 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9625 return DAG.getSelect(
9626 DL, ResVT, NoNaNs, Res,
9627 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
9628}
9629
9630SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
9631 SelectionDAG &DAG) const {
9632 SDLoc DL(Op);
9633 unsigned Opc = Op.getOpcode();
9634 SDValue Start = Op.getOperand(0);
9635 SDValue Vec = Op.getOperand(1);
9636 EVT VecEVT = Vec.getValueType();
9637 MVT XLenVT = Subtarget.getXLenVT();
9638
9639 // TODO: The type may need to be widened rather than split. Or widened before
9640 // it can be split.
9641 if (!isTypeLegal(VecEVT))
9642 return SDValue();
9643
9644 MVT VecVT = VecEVT.getSimpleVT();
9645 unsigned RVVOpcode = getRVVReductionOp(Opc);
9646
9647 if (VecVT.isFixedLengthVector()) {
9648 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
9649 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9650 }
9651
9652 SDValue VL = Op.getOperand(3);
9653 SDValue Mask = Op.getOperand(2);
9654 SDValue Res =
9655 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
9656 Vec, Mask, VL, DL, DAG, Subtarget);
9657 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
9658 Op->getFlags().hasNoNaNs())
9659 return Res;
9660
9661 // Propagate NaNs.
9662 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
9663 // Check if any of the elements in Vec is NaN.
9664 SDValue IsNaN = DAG.getNode(
9665 RISCVISD::SETCC_VL, DL, PredVT,
9666 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
9667 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
9668 // Check if the start value is NaN.
9669 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
9670 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
9671 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
9672 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9673 MVT ResVT = Res.getSimpleValueType();
9674 return DAG.getSelect(
9675 DL, ResVT, NoNaNs, Res,
9676 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
9677}
9678
9679SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9680 SelectionDAG &DAG) const {
9681 SDValue Vec = Op.getOperand(0);
9682 SDValue SubVec = Op.getOperand(1);
9683 MVT VecVT = Vec.getSimpleValueType();
9684 MVT SubVecVT = SubVec.getSimpleValueType();
9685
9686 SDLoc DL(Op);
9687 MVT XLenVT = Subtarget.getXLenVT();
9688 unsigned OrigIdx = Op.getConstantOperandVal(2);
9689 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9690
9691 // We don't have the ability to slide mask vectors up indexed by their i1
9692 // elements; the smallest we can do is i8. Often we are able to bitcast to
9693 // equivalent i8 vectors. Note that when inserting a fixed-length vector
9694 // into a scalable one, we might not necessarily have enough scalable
9695 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
9696 if (SubVecVT.getVectorElementType() == MVT::i1 &&
9697 (OrigIdx != 0 || !Vec.isUndef())) {
9698 if (VecVT.getVectorMinNumElements() >= 8 &&
9699 SubVecVT.getVectorMinNumElements() >= 8) {
9700 assert(OrigIdx % 8 == 0 && "Invalid index");
9701 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9702 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9703 "Unexpected mask vector lowering");
9704 OrigIdx /= 8;
9705 SubVecVT =
9706 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9707 SubVecVT.isScalableVector());
9708 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9709 VecVT.isScalableVector());
9710 Vec = DAG.getBitcast(VecVT, Vec);
9711 SubVec = DAG.getBitcast(SubVecVT, SubVec);
9712 } else {
9713 // We can't slide this mask vector up indexed by its i1 elements.
9714 // This poses a problem when we wish to insert a scalable vector which
9715 // can't be re-expressed as a larger type. Just choose the slow path and
9716 // extend to a larger type, then truncate back down.
9717 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9718 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9719 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9720 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
9721 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
9722 Op.getOperand(2));
9723 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
9724 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
9725 }
9726 }
9727
9728 // If the subvector vector is a fixed-length type and we don't know VLEN
9729 // exactly, we cannot use subregister manipulation to simplify the codegen; we
9730 // don't know which register of a LMUL group contains the specific subvector
9731 // as we only know the minimum register size. Therefore we must slide the
9732 // vector group up the full amount.
9733 const auto VLen = Subtarget.getRealVLen();
9734 if (SubVecVT.isFixedLengthVector() && !VLen) {
9735 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
9736 return Op;
9737 MVT ContainerVT = VecVT;
9738 if (VecVT.isFixedLengthVector()) {
9739 ContainerVT = getContainerForFixedLengthVector(VecVT);
9740 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9741 }
9742
9743 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
9744 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9745 DAG.getUNDEF(ContainerVT), SubVec,
9746 DAG.getVectorIdxConstant(0, DL));
9747 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9748 return DAG.getBitcast(Op.getValueType(), SubVec);
9749 }
9750
9751 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9752 DAG.getUNDEF(ContainerVT), SubVec,
9753 DAG.getVectorIdxConstant(0, DL));
9754 SDValue Mask =
9755 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9756 // Set the vector length to only the number of elements we care about. Note
9757 // that for slideup this includes the offset.
9758 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
9759 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
9760
9761 // Use tail agnostic policy if we're inserting over Vec's tail.
9763 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
9764 Policy = RISCVII::TAIL_AGNOSTIC;
9765
9766 // If we're inserting into the lowest elements, use a tail undisturbed
9767 // vmv.v.v.
9768 if (OrigIdx == 0) {
9769 SubVec =
9770 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
9771 } else {
9772 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9773 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
9774 SlideupAmt, Mask, VL, Policy);
9775 }
9776
9777 if (VecVT.isFixedLengthVector())
9778 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9779 return DAG.getBitcast(Op.getValueType(), SubVec);
9780 }
9781
9782 MVT ContainerVecVT = VecVT;
9783 if (VecVT.isFixedLengthVector()) {
9784 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
9785 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
9786 }
9787
9788 MVT ContainerSubVecVT = SubVecVT;
9789 if (SubVecVT.isFixedLengthVector()) {
9790 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
9791 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
9792 }
9793
9794 unsigned SubRegIdx;
9795 ElementCount RemIdx;
9796 // insert_subvector scales the index by vscale if the subvector is scalable,
9797 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
9798 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
9799 if (SubVecVT.isFixedLengthVector()) {
9800 assert(VLen);
9801 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
9802 auto Decompose =
9804 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
9805 SubRegIdx = Decompose.first;
9806 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
9807 (OrigIdx % Vscale));
9808 } else {
9809 auto Decompose =
9811 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
9812 SubRegIdx = Decompose.first;
9813 RemIdx = ElementCount::getScalable(Decompose.second);
9814 }
9815
9818 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
9819 bool ExactlyVecRegSized =
9820 Subtarget.expandVScale(SubVecVT.getSizeInBits())
9821 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
9822
9823 // 1. If the Idx has been completely eliminated and this subvector's size is
9824 // a vector register or a multiple thereof, or the surrounding elements are
9825 // undef, then this is a subvector insert which naturally aligns to a vector
9826 // register. These can easily be handled using subregister manipulation.
9827 // 2. If the subvector isn't an exact multiple of a valid register group size,
9828 // then the insertion must preserve the undisturbed elements of the register.
9829 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
9830 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
9831 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
9832 // of that LMUL=1 type back into the larger vector (resolving to another
9833 // subregister operation). See below for how our VSLIDEUP works. We go via a
9834 // LMUL=1 type to avoid allocating a large register group to hold our
9835 // subvector.
9836 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
9837 if (SubVecVT.isFixedLengthVector()) {
9838 // We may get NoSubRegister if inserting at index 0 and the subvec
9839 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
9840 if (SubRegIdx == RISCV::NoSubRegister) {
9841 assert(OrigIdx == 0);
9842 return Op;
9843 }
9844
9845 SDValue Insert =
9846 DAG.getTargetInsertSubreg(SubRegIdx, DL, ContainerVecVT, Vec, SubVec);
9847 if (VecVT.isFixedLengthVector())
9848 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
9849 return Insert;
9850 }
9851 return Op;
9852 }
9853
9854 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
9855 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
9856 // (in our case undisturbed). This means we can set up a subvector insertion
9857 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
9858 // size of the subvector.
9859 MVT InterSubVT = ContainerVecVT;
9860 SDValue AlignedExtract = Vec;
9861 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
9862 if (SubVecVT.isFixedLengthVector())
9863 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
9864 if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
9865 InterSubVT = getLMUL1VT(ContainerVecVT);
9866 // Extract a subvector equal to the nearest full vector register type. This
9867 // should resolve to a EXTRACT_SUBREG instruction.
9868 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
9869 DAG.getVectorIdxConstant(AlignedIdx, DL));
9870 }
9871
9872 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
9873 DAG.getUNDEF(InterSubVT), SubVec,
9874 DAG.getVectorIdxConstant(0, DL));
9875
9876 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
9877
9878 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
9879 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
9880
9881 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
9883 if (Subtarget.expandVScale(EndIndex) ==
9884 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
9885 Policy = RISCVII::TAIL_AGNOSTIC;
9886
9887 // If we're inserting into the lowest elements, use a tail undisturbed
9888 // vmv.v.v.
9889 if (RemIdx.isZero()) {
9890 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
9891 SubVec, VL);
9892 } else {
9893 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
9894
9895 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
9896 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
9897
9898 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
9899 SlideupAmt, Mask, VL, Policy);
9900 }
9901
9902 // If required, insert this subvector back into the correct vector register.
9903 // This should resolve to an INSERT_SUBREG instruction.
9904 if (ContainerVecVT.bitsGT(InterSubVT))
9905 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
9906 DAG.getVectorIdxConstant(AlignedIdx, DL));
9907
9908 if (VecVT.isFixedLengthVector())
9909 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9910
9911 // We might have bitcast from a mask type: cast back to the original type if
9912 // required.
9913 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
9914}
9915
9916SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
9917 SelectionDAG &DAG) const {
9918 SDValue Vec = Op.getOperand(0);
9919 MVT SubVecVT = Op.getSimpleValueType();
9920 MVT VecVT = Vec.getSimpleValueType();
9921
9922 SDLoc DL(Op);
9923 MVT XLenVT = Subtarget.getXLenVT();
9924 unsigned OrigIdx = Op.getConstantOperandVal(1);
9925 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9926
9927 // We don't have the ability to slide mask vectors down indexed by their i1
9928 // elements; the smallest we can do is i8. Often we are able to bitcast to
9929 // equivalent i8 vectors. Note that when extracting a fixed-length vector
9930 // from a scalable one, we might not necessarily have enough scalable
9931 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
9932 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
9933 if (VecVT.getVectorMinNumElements() >= 8 &&
9934 SubVecVT.getVectorMinNumElements() >= 8) {
9935 assert(OrigIdx % 8 == 0 && "Invalid index");
9936 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9937 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9938 "Unexpected mask vector lowering");
9939 OrigIdx /= 8;
9940 SubVecVT =
9941 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9942 SubVecVT.isScalableVector());
9943 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9944 VecVT.isScalableVector());
9945 Vec = DAG.getBitcast(VecVT, Vec);
9946 } else {
9947 // We can't slide this mask vector down, indexed by its i1 elements.
9948 // This poses a problem when we wish to extract a scalable vector which
9949 // can't be re-expressed as a larger type. Just choose the slow path and
9950 // extend to a larger type, then truncate back down.
9951 // TODO: We could probably improve this when extracting certain fixed
9952 // from fixed, where we can extract as i8 and shift the correct element
9953 // right to reach the desired subvector?
9954 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9955 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9956 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9957 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
9958 Op.getOperand(1));
9959 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
9960 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
9961 }
9962 }
9963
9964 // With an index of 0 this is a cast-like subvector, which can be performed
9965 // with subregister operations.
9966 if (OrigIdx == 0)
9967 return Op;
9968
9969 const auto VLen = Subtarget.getRealVLen();
9970
9971 // If the subvector vector is a fixed-length type and we don't know VLEN
9972 // exactly, we cannot use subregister manipulation to simplify the codegen; we
9973 // don't know which register of a LMUL group contains the specific subvector
9974 // as we only know the minimum register size. Therefore we must slide the
9975 // vector group down the full amount.
9976 if (SubVecVT.isFixedLengthVector() && !VLen) {
9977 MVT ContainerVT = VecVT;
9978 if (VecVT.isFixedLengthVector()) {
9979 ContainerVT = getContainerForFixedLengthVector(VecVT);
9980 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9981 }
9982
9983 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
9984 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
9985 if (auto ShrunkVT =
9986 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
9987 ContainerVT = *ShrunkVT;
9988 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9989 DAG.getVectorIdxConstant(0, DL));
9990 }
9991
9992 SDValue Mask =
9993 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9994 // Set the vector length to only the number of elements we care about. This
9995 // avoids sliding down elements we're going to discard straight away.
9996 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
9997 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9998 SDValue Slidedown =
9999 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10000 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
10001 // Now we can use a cast-like subvector extract to get the result.
10002 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10003 DAG.getVectorIdxConstant(0, DL));
10004 return DAG.getBitcast(Op.getValueType(), Slidedown);
10005 }
10006
10007 if (VecVT.isFixedLengthVector()) {
10008 VecVT = getContainerForFixedLengthVector(VecVT);
10009 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10010 }
10011
10012 MVT ContainerSubVecVT = SubVecVT;
10013 if (SubVecVT.isFixedLengthVector())
10014 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10015
10016 unsigned SubRegIdx;
10017 ElementCount RemIdx;
10018 // extract_subvector scales the index by vscale if the subvector is scalable,
10019 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10020 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10021 if (SubVecVT.isFixedLengthVector()) {
10022 assert(VLen);
10023 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10024 auto Decompose =
10026 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10027 SubRegIdx = Decompose.first;
10028 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10029 (OrigIdx % Vscale));
10030 } else {
10031 auto Decompose =
10033 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10034 SubRegIdx = Decompose.first;
10035 RemIdx = ElementCount::getScalable(Decompose.second);
10036 }
10037
10038 // If the Idx has been completely eliminated then this is a subvector extract
10039 // which naturally aligns to a vector register. These can easily be handled
10040 // using subregister manipulation.
10041 if (RemIdx.isZero()) {
10042 if (SubVecVT.isFixedLengthVector()) {
10043 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, ContainerSubVecVT, Vec);
10044 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10045 }
10046 return Op;
10047 }
10048
10049 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10050 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10051 // divide exactly.
10052 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10053 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10054
10055 // If the vector type is an LMUL-group type, extract a subvector equal to the
10056 // nearest full vector register type.
10057 MVT InterSubVT = VecVT;
10058 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10059 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10060 // we should have successfully decomposed the extract into a subregister.
10061 assert(SubRegIdx != RISCV::NoSubRegister);
10062 InterSubVT = getLMUL1VT(VecVT);
10063 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);
10064 }
10065
10066 // Slide this vector register down by the desired number of elements in order
10067 // to place the desired subvector starting at element 0.
10068 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10069 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10070 if (SubVecVT.isFixedLengthVector())
10071 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10072 SDValue Slidedown =
10073 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10074 Vec, SlidedownAmt, Mask, VL);
10075
10076 // Now the vector is in the right position, extract our final subvector. This
10077 // should resolve to a COPY.
10078 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10079 DAG.getVectorIdxConstant(0, DL));
10080
10081 // We might have bitcast from a mask type: cast back to the original type if
10082 // required.
10083 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10084}
10085
10086// Widen a vector's operands to i8, then truncate its results back to the
10087// original type, typically i1. All operand and result types must be the same.
10089 SelectionDAG &DAG) {
10090 MVT VT = N.getSimpleValueType();
10091 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10093 for (SDValue Op : N->ops()) {
10094 assert(Op.getSimpleValueType() == VT &&
10095 "Operands and result must be same type");
10096 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10097 }
10098
10099 unsigned NumVals = N->getNumValues();
10100
10102 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10103 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10104 SmallVector<SDValue, 4> TruncVals;
10105 for (unsigned I = 0; I < NumVals; I++) {
10106 TruncVals.push_back(
10107 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10108 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10109 }
10110
10111 if (TruncVals.size() > 1)
10112 return DAG.getMergeValues(TruncVals, DL);
10113 return TruncVals.front();
10114}
10115
10116SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10117 SelectionDAG &DAG) const {
10118 SDLoc DL(Op);
10119 MVT VecVT = Op.getSimpleValueType();
10120
10121 assert(VecVT.isScalableVector() &&
10122 "vector_interleave on non-scalable vector!");
10123
10124 // 1 bit element vectors need to be widened to e8
10125 if (VecVT.getVectorElementType() == MVT::i1)
10126 return widenVectorOpsToi8(Op, DL, DAG);
10127
10128 // If the VT is LMUL=8, we need to split and reassemble.
10129 if (VecVT.getSizeInBits().getKnownMinValue() ==
10130 (8 * RISCV::RVVBitsPerBlock)) {
10131 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10132 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10133 EVT SplitVT = Op0Lo.getValueType();
10134
10136 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10138 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10139
10140 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10141 ResLo.getValue(0), ResHi.getValue(0));
10142 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10143 ResHi.getValue(1));
10144 return DAG.getMergeValues({Even, Odd}, DL);
10145 }
10146
10147 // Concatenate the two vectors as one vector to deinterleave
10148 MVT ConcatVT =
10151 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10152 Op.getOperand(0), Op.getOperand(1));
10153
10154 // We want to operate on all lanes, so get the mask and VL and mask for it
10155 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
10156 SDValue Passthru = DAG.getUNDEF(ConcatVT);
10157
10158 // We can deinterleave through vnsrl.wi if the element type is smaller than
10159 // ELEN
10160 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10161 SDValue Even =
10162 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
10163 SDValue Odd =
10164 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
10165 return DAG.getMergeValues({Even, Odd}, DL);
10166 }
10167
10168 // For the indices, use the same SEW to avoid an extra vsetvli
10169 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
10170 // Create a vector of even indices {0, 2, 4, ...}
10171 SDValue EvenIdx =
10172 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
10173 // Create a vector of odd indices {1, 3, 5, ... }
10174 SDValue OddIdx =
10175 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
10176
10177 // Gather the even and odd elements into two separate vectors
10178 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10179 Concat, EvenIdx, Passthru, Mask, VL);
10180 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10181 Concat, OddIdx, Passthru, Mask, VL);
10182
10183 // Extract the result half of the gather for even and odd
10184 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
10185 DAG.getVectorIdxConstant(0, DL));
10186 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
10187 DAG.getVectorIdxConstant(0, DL));
10188
10189 return DAG.getMergeValues({Even, Odd}, DL);
10190}
10191
10192SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
10193 SelectionDAG &DAG) const {
10194 SDLoc DL(Op);
10195 MVT VecVT = Op.getSimpleValueType();
10196
10197 assert(VecVT.isScalableVector() &&
10198 "vector_interleave on non-scalable vector!");
10199
10200 // i1 vectors need to be widened to i8
10201 if (VecVT.getVectorElementType() == MVT::i1)
10202 return widenVectorOpsToi8(Op, DL, DAG);
10203
10204 MVT XLenVT = Subtarget.getXLenVT();
10205 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
10206
10207 // If the VT is LMUL=8, we need to split and reassemble.
10208 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
10209 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10210 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10211 EVT SplitVT = Op0Lo.getValueType();
10212
10214 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
10216 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
10217
10218 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10219 ResLo.getValue(0), ResLo.getValue(1));
10220 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10221 ResHi.getValue(0), ResHi.getValue(1));
10222 return DAG.getMergeValues({Lo, Hi}, DL);
10223 }
10224
10225 SDValue Interleaved;
10226
10227 // If the element type is smaller than ELEN, then we can interleave with
10228 // vwaddu.vv and vwmaccu.vx
10229 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10230 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
10231 DAG, Subtarget);
10232 } else {
10233 // Otherwise, fallback to using vrgathere16.vv
10234 MVT ConcatVT =
10237 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10238 Op.getOperand(0), Op.getOperand(1));
10239
10240 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
10241
10242 // 0 1 2 3 4 5 6 7 ...
10243 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
10244
10245 // 1 1 1 1 1 1 1 1 ...
10246 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
10247
10248 // 1 0 1 0 1 0 1 0 ...
10249 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
10250 OddMask = DAG.getSetCC(
10251 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
10252 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
10254
10255 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
10256
10257 // Build up the index vector for interleaving the concatenated vector
10258 // 0 0 1 1 2 2 3 3 ...
10259 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
10260 // 0 n 1 n+1 2 n+2 3 n+3 ...
10261 Idx =
10262 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
10263
10264 // Then perform the interleave
10265 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
10266 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
10267 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
10268 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
10269 }
10270
10271 // Extract the two halves from the interleaved result
10272 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10273 DAG.getVectorIdxConstant(0, DL));
10274 SDValue Hi = DAG.getNode(
10275 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10277
10278 return DAG.getMergeValues({Lo, Hi}, DL);
10279}
10280
10281// Lower step_vector to the vid instruction. Any non-identity step value must
10282// be accounted for my manual expansion.
10283SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
10284 SelectionDAG &DAG) const {
10285 SDLoc DL(Op);
10286 MVT VT = Op.getSimpleValueType();
10287 assert(VT.isScalableVector() && "Expected scalable vector");
10288 MVT XLenVT = Subtarget.getXLenVT();
10289 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
10290 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10291 uint64_t StepValImm = Op.getConstantOperandVal(0);
10292 if (StepValImm != 1) {
10293 if (isPowerOf2_64(StepValImm)) {
10294 SDValue StepVal =
10295 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10296 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
10297 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
10298 } else {
10299 SDValue StepVal = lowerScalarSplat(
10300 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
10301 VL, VT, DL, DAG, Subtarget);
10302 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
10303 }
10304 }
10305 return StepVec;
10306}
10307
10308// Implement vector_reverse using vrgather.vv with indices determined by
10309// subtracting the id of each element from (VLMAX-1). This will convert
10310// the indices like so:
10311// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
10312// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
10313SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
10314 SelectionDAG &DAG) const {
10315 SDLoc DL(Op);
10316 MVT VecVT = Op.getSimpleValueType();
10317 if (VecVT.getVectorElementType() == MVT::i1) {
10318 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10319 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
10320 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
10321 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
10322 }
10323
10324 MVT ContainerVT = VecVT;
10325 SDValue Vec = Op.getOperand(0);
10326 if (VecVT.isFixedLengthVector()) {
10327 ContainerVT = getContainerForFixedLengthVector(VecVT);
10328 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10329 }
10330
10331 unsigned EltSize = ContainerVT.getScalarSizeInBits();
10332 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
10333 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
10334 unsigned MaxVLMAX =
10335 VecVT.isFixedLengthVector()
10336 ? VecVT.getVectorNumElements()
10337 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
10338
10339 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
10340 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
10341
10342 // If this is SEW=8 and VLMAX is potentially more than 256, we need
10343 // to use vrgatherei16.vv.
10344 if (MaxVLMAX > 256 && EltSize == 8) {
10345 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
10346 // Reverse each half, then reassemble them in reverse order.
10347 // NOTE: It's also possible that after splitting that VLMAX no longer
10348 // requires vrgatherei16.vv.
10349 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
10350 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10351 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
10352 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
10353 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
10354 // Reassemble the low and high pieces reversed.
10355 // FIXME: This is a CONCAT_VECTORS.
10356 SDValue Res =
10357 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
10358 DAG.getVectorIdxConstant(0, DL));
10359 return DAG.getNode(
10360 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
10361 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
10362 }
10363
10364 // Just promote the int type to i16 which will double the LMUL.
10365 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
10366 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
10367 }
10368
10369 // At LMUL > 1, do the index computation in 16 bits to reduce register
10370 // pressure.
10371 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
10372 IntVT.bitsGT(getLMUL1VT(IntVT))) {
10373 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
10374 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
10375 IntVT = IntVT.changeVectorElementType(MVT::i16);
10376 }
10377
10378 MVT XLenVT = Subtarget.getXLenVT();
10379 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10380
10381 // Calculate VLMAX-1 for the desired SEW.
10382 SDValue VLMinus1 = DAG.getNode(
10383 ISD::SUB, DL, XLenVT,
10384 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
10385 DAG.getConstant(1, DL, XLenVT));
10386
10387 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
10388 bool IsRV32E64 =
10389 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
10390 SDValue SplatVL;
10391 if (!IsRV32E64)
10392 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
10393 else
10394 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
10395 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
10396
10397 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
10398 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
10399 DAG.getUNDEF(IntVT), Mask, VL);
10400
10401 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
10402 DAG.getUNDEF(ContainerVT), Mask, VL);
10403 if (VecVT.isFixedLengthVector())
10404 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
10405 return Gather;
10406}
10407
10408SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
10409 SelectionDAG &DAG) const {
10410 SDLoc DL(Op);
10411 SDValue V1 = Op.getOperand(0);
10412 SDValue V2 = Op.getOperand(1);
10413 MVT XLenVT = Subtarget.getXLenVT();
10414 MVT VecVT = Op.getSimpleValueType();
10415
10416 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
10417
10418 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
10419 SDValue DownOffset, UpOffset;
10420 if (ImmValue >= 0) {
10421 // The operand is a TargetConstant, we need to rebuild it as a regular
10422 // constant.
10423 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
10424 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
10425 } else {
10426 // The operand is a TargetConstant, we need to rebuild it as a regular
10427 // constant rather than negating the original operand.
10428 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
10429 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
10430 }
10431
10432 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
10433
10434 SDValue SlideDown =
10435 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
10436 DownOffset, TrueMask, UpOffset);
10437 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
10438 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
10440}
10441
10442SDValue
10443RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
10444 SelectionDAG &DAG) const {
10445 SDLoc DL(Op);
10446 auto *Load = cast<LoadSDNode>(Op);
10447
10449 Load->getMemoryVT(),
10450 *Load->getMemOperand()) &&
10451 "Expecting a correctly-aligned load");
10452
10453 MVT VT = Op.getSimpleValueType();
10454 MVT XLenVT = Subtarget.getXLenVT();
10455 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10456
10457 // If we know the exact VLEN and our fixed length vector completely fills
10458 // the container, use a whole register load instead.
10459 const auto [MinVLMAX, MaxVLMAX] =
10460 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10461 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10462 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10463 MachineMemOperand *MMO = Load->getMemOperand();
10464 SDValue NewLoad =
10465 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
10466 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
10467 MMO->getAAInfo(), MMO->getRanges());
10468 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10469 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10470 }
10471
10472 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10473
10474 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10475 SDValue IntID = DAG.getTargetConstant(
10476 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
10477 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
10478 if (!IsMaskOp)
10479 Ops.push_back(DAG.getUNDEF(ContainerVT));
10480 Ops.push_back(Load->getBasePtr());
10481 Ops.push_back(VL);
10482 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10483 SDValue NewLoad =
10485 Load->getMemoryVT(), Load->getMemOperand());
10486
10487 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10488 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10489}
10490
10491SDValue
10492RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
10493 SelectionDAG &DAG) const {
10494 SDLoc DL(Op);
10495 auto *Store = cast<StoreSDNode>(Op);
10496
10498 Store->getMemoryVT(),
10499 *Store->getMemOperand()) &&
10500 "Expecting a correctly-aligned store");
10501
10502 SDValue StoreVal = Store->getValue();
10503 MVT VT = StoreVal.getSimpleValueType();
10504 MVT XLenVT = Subtarget.getXLenVT();
10505
10506 // If the size less than a byte, we need to pad with zeros to make a byte.
10507 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
10508 VT = MVT::v8i1;
10509 StoreVal =
10510 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
10511 StoreVal, DAG.getVectorIdxConstant(0, DL));
10512 }
10513
10514 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10515
10516 SDValue NewValue =
10517 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
10518
10519 // If we know the exact VLEN and our fixed length vector completely fills
10520 // the container, use a whole register store instead.
10521 const auto [MinVLMAX, MaxVLMAX] =
10522 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10523 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10524 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10525 MachineMemOperand *MMO = Store->getMemOperand();
10526 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
10527 MMO->getPointerInfo(), MMO->getBaseAlign(),
10528 MMO->getFlags(), MMO->getAAInfo());
10529 }
10530
10531 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10532
10533 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10534 SDValue IntID = DAG.getTargetConstant(
10535 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
10536 return DAG.getMemIntrinsicNode(
10537 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
10538 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
10539 Store->getMemoryVT(), Store->getMemOperand());
10540}
10541
10542SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
10543 SelectionDAG &DAG) const {
10544 SDLoc DL(Op);
10545 MVT VT = Op.getSimpleValueType();
10546
10547 const auto *MemSD = cast<MemSDNode>(Op);
10548 EVT MemVT = MemSD->getMemoryVT();
10549 MachineMemOperand *MMO = MemSD->getMemOperand();
10550 SDValue Chain = MemSD->getChain();
10551 SDValue BasePtr = MemSD->getBasePtr();
10552
10553 SDValue Mask, PassThru, VL;
10554 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
10555 Mask = VPLoad->getMask();
10556 PassThru = DAG.getUNDEF(VT);
10557 VL = VPLoad->getVectorLength();
10558 } else {
10559 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
10560 Mask = MLoad->getMask();
10561 PassThru = MLoad->getPassThru();
10562 }
10563
10564 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10565
10566 MVT XLenVT = Subtarget.getXLenVT();
10567
10568 MVT ContainerVT = VT;
10569 if (VT.isFixedLengthVector()) {
10570 ContainerVT = getContainerForFixedLengthVector(VT);
10571 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
10572 if (!IsUnmasked) {
10573 MVT MaskVT = getMaskTypeFor(ContainerVT);
10574 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10575 }
10576 }
10577
10578 if (!VL)
10579 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10580
10581 unsigned IntID =
10582 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
10583 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10584 if (IsUnmasked)
10585 Ops.push_back(DAG.getUNDEF(ContainerVT));
10586 else
10587 Ops.push_back(PassThru);
10588 Ops.push_back(BasePtr);
10589 if (!IsUnmasked)
10590 Ops.push_back(Mask);
10591 Ops.push_back(VL);
10592 if (!IsUnmasked)
10594
10595 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10596
10597 SDValue Result =
10598 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
10599 Chain = Result.getValue(1);
10600
10601 if (VT.isFixedLengthVector())
10602 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10603
10604 return DAG.getMergeValues({Result, Chain}, DL);
10605}
10606
10607SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
10608 SelectionDAG &DAG) const {
10609 SDLoc DL(Op);
10610
10611 const auto *MemSD = cast<MemSDNode>(Op);
10612 EVT MemVT = MemSD->getMemoryVT();
10613 MachineMemOperand *MMO = MemSD->getMemOperand();
10614 SDValue Chain = MemSD->getChain();
10615 SDValue BasePtr = MemSD->getBasePtr();
10616 SDValue Val, Mask, VL;
10617
10618 bool IsCompressingStore = false;
10619 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
10620 Val = VPStore->getValue();
10621 Mask = VPStore->getMask();
10622 VL = VPStore->getVectorLength();
10623 } else {
10624 const auto *MStore = cast<MaskedStoreSDNode>(Op);
10625 Val = MStore->getValue();
10626 Mask = MStore->getMask();
10627 IsCompressingStore = MStore->isCompressingStore();
10628 }
10629
10630 bool IsUnmasked =
10631 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
10632
10633 MVT VT = Val.getSimpleValueType();
10634 MVT XLenVT = Subtarget.getXLenVT();
10635
10636 MVT ContainerVT = VT;
10637 if (VT.isFixedLengthVector()) {
10638 ContainerVT = getContainerForFixedLengthVector(VT);
10639
10640 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
10641 if (!IsUnmasked || IsCompressingStore) {
10642 MVT MaskVT = getMaskTypeFor(ContainerVT);
10643 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10644 }
10645 }
10646
10647 if (!VL)
10648 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10649
10650 if (IsCompressingStore) {
10651 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
10652 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
10653 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
10654 VL =
10655 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
10656 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
10657 }
10658
10659 unsigned IntID =
10660 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
10661 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10662 Ops.push_back(Val);
10663 Ops.push_back(BasePtr);
10664 if (!IsUnmasked)
10665 Ops.push_back(Mask);
10666 Ops.push_back(VL);
10667
10669 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
10670}
10671
10672SDValue
10673RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
10674 SelectionDAG &DAG) const {
10675 MVT InVT = Op.getOperand(0).getSimpleValueType();
10676 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
10677
10678 MVT VT = Op.getSimpleValueType();
10679
10680 SDValue Op1 =
10681 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
10682 SDValue Op2 =
10683 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10684
10685 SDLoc DL(Op);
10686 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
10687 DAG, Subtarget);
10688 MVT MaskVT = getMaskTypeFor(ContainerVT);
10689
10690 SDValue Cmp =
10691 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10692 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
10693
10694 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
10695}
10696
10697SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
10698 SelectionDAG &DAG) const {
10699 unsigned Opc = Op.getOpcode();
10700 SDLoc DL(Op);
10701 SDValue Chain = Op.getOperand(0);
10702 SDValue Op1 = Op.getOperand(1);
10703 SDValue Op2 = Op.getOperand(2);
10704 SDValue CC = Op.getOperand(3);
10705 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
10706 MVT VT = Op.getSimpleValueType();
10707 MVT InVT = Op1.getSimpleValueType();
10708
10709 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
10710 // condition code.
10711 if (Opc == ISD::STRICT_FSETCCS) {
10712 // Expand strict_fsetccs(x, oeq) to
10713 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
10714 SDVTList VTList = Op->getVTList();
10715 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
10716 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
10717 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10718 Op2, OLECCVal);
10719 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
10720 Op1, OLECCVal);
10721 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
10722 Tmp1.getValue(1), Tmp2.getValue(1));
10723 // Tmp1 and Tmp2 might be the same node.
10724 if (Tmp1 != Tmp2)
10725 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
10726 return DAG.getMergeValues({Tmp1, OutChain}, DL);
10727 }
10728
10729 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
10730 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
10731 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
10732 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10733 Op2, OEQCCVal);
10734 SDValue Res = DAG.getNOT(DL, OEQ, VT);
10735 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
10736 }
10737 }
10738
10739 MVT ContainerInVT = InVT;
10740 if (InVT.isFixedLengthVector()) {
10741 ContainerInVT = getContainerForFixedLengthVector(InVT);
10742 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
10743 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
10744 }
10745 MVT MaskVT = getMaskTypeFor(ContainerInVT);
10746
10747 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
10748
10749 SDValue Res;
10750 if (Opc == ISD::STRICT_FSETCC &&
10751 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
10752 CCVal == ISD::SETOLE)) {
10753 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
10754 // active when both input elements are ordered.
10755 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
10756 SDValue OrderMask1 = DAG.getNode(
10757 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10758 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10759 True, VL});
10760 SDValue OrderMask2 = DAG.getNode(
10761 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10762 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10763 True, VL});
10764 Mask =
10765 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
10766 // Use Mask as the passthru operand to let the result be 0 if either of the
10767 // inputs is unordered.
10769 DAG.getVTList(MaskVT, MVT::Other),
10770 {Chain, Op1, Op2, CC, Mask, Mask, VL});
10771 } else {
10772 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
10774 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
10775 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
10776 }
10777
10778 if (VT.isFixedLengthVector()) {
10779 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10780 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10781 }
10782 return Res;
10783}
10784
10785// Lower vector ABS to smax(X, sub(0, X)).
10786SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
10787 SDLoc DL(Op);
10788 MVT VT = Op.getSimpleValueType();
10789 SDValue X = Op.getOperand(0);
10790
10791 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
10792 "Unexpected type for ISD::ABS");
10793
10794 MVT ContainerVT = VT;
10795 if (VT.isFixedLengthVector()) {
10796 ContainerVT = getContainerForFixedLengthVector(VT);
10797 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
10798 }
10799
10800 SDValue Mask, VL;
10801 if (Op->getOpcode() == ISD::VP_ABS) {
10802 Mask = Op->getOperand(1);
10803 if (VT.isFixedLengthVector())
10804 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
10805 Subtarget);
10806 VL = Op->getOperand(2);
10807 } else
10808 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10809
10810 SDValue SplatZero = DAG.getNode(
10811 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
10812 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
10813 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
10814 DAG.getUNDEF(ContainerVT), Mask, VL);
10815 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
10816 DAG.getUNDEF(ContainerVT), Mask, VL);
10817
10818 if (VT.isFixedLengthVector())
10819 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
10820 return Max;
10821}
10822
10823SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
10824 SDValue Op, SelectionDAG &DAG) const {
10825 SDLoc DL(Op);
10826 MVT VT = Op.getSimpleValueType();
10827 SDValue Mag = Op.getOperand(0);
10828 SDValue Sign = Op.getOperand(1);
10829 assert(Mag.getValueType() == Sign.getValueType() &&
10830 "Can only handle COPYSIGN with matching types.");
10831
10832 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10833 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
10834 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
10835
10836 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10837
10838 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
10839 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
10840
10841 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
10842}
10843
10844SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
10845 SDValue Op, SelectionDAG &DAG) const {
10846 MVT VT = Op.getSimpleValueType();
10847 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10848
10849 MVT I1ContainerVT =
10850 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10851
10852 SDValue CC =
10853 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
10854 SDValue Op1 =
10855 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10856 SDValue Op2 =
10857 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
10858
10859 SDLoc DL(Op);
10860 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10861
10862 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
10863 Op2, DAG.getUNDEF(ContainerVT), VL);
10864
10865 return convertFromScalableVector(VT, Select, DAG, Subtarget);
10866}
10867
10868SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
10869 SelectionDAG &DAG) const {
10870 unsigned NewOpc = getRISCVVLOp(Op);
10871 bool HasPassthruOp = hasPassthruOp(NewOpc);
10872 bool HasMask = hasMaskOp(NewOpc);
10873
10874 MVT VT = Op.getSimpleValueType();
10875 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10876
10877 // Create list of operands by converting existing ones to scalable types.
10879 for (const SDValue &V : Op->op_values()) {
10880 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10881
10882 // Pass through non-vector operands.
10883 if (!V.getValueType().isVector()) {
10884 Ops.push_back(V);
10885 continue;
10886 }
10887
10888 // "cast" fixed length vector to a scalable vector.
10889 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
10890 "Only fixed length vectors are supported!");
10891 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10892 }
10893
10894 SDLoc DL(Op);
10895 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10896 if (HasPassthruOp)
10897 Ops.push_back(DAG.getUNDEF(ContainerVT));
10898 if (HasMask)
10899 Ops.push_back(Mask);
10900 Ops.push_back(VL);
10901
10902 // StrictFP operations have two result values. Their lowered result should
10903 // have same result count.
10904 if (Op->isStrictFPOpcode()) {
10905 SDValue ScalableRes =
10906 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
10907 Op->getFlags());
10908 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10909 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
10910 }
10911
10912 SDValue ScalableRes =
10913 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
10914 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10915}
10916
10917// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
10918// * Operands of each node are assumed to be in the same order.
10919// * The EVL operand is promoted from i32 to i64 on RV64.
10920// * Fixed-length vectors are converted to their scalable-vector container
10921// types.
10922SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
10923 unsigned RISCVISDOpc = getRISCVVLOp(Op);
10924 bool HasPassthruOp = hasPassthruOp(RISCVISDOpc);
10925
10926 SDLoc DL(Op);
10927 MVT VT = Op.getSimpleValueType();
10929
10930 MVT ContainerVT = VT;
10931 if (VT.isFixedLengthVector())
10932 ContainerVT = getContainerForFixedLengthVector(VT);
10933
10934 for (const auto &OpIdx : enumerate(Op->ops())) {
10935 SDValue V = OpIdx.value();
10936 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10937 // Add dummy passthru value before the mask. Or if there isn't a mask,
10938 // before EVL.
10939 if (HasPassthruOp) {
10940 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
10941 if (MaskIdx) {
10942 if (*MaskIdx == OpIdx.index())
10943 Ops.push_back(DAG.getUNDEF(ContainerVT));
10944 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
10945 OpIdx.index()) {
10946 if (Op.getOpcode() == ISD::VP_MERGE) {
10947 // For VP_MERGE, copy the false operand instead of an undef value.
10948 Ops.push_back(Ops.back());
10949 } else {
10950 assert(Op.getOpcode() == ISD::VP_SELECT);
10951 // For VP_SELECT, add an undef value.
10952 Ops.push_back(DAG.getUNDEF(ContainerVT));
10953 }
10954 }
10955 }
10956 // Pass through operands which aren't fixed-length vectors.
10957 if (!V.getValueType().isFixedLengthVector()) {
10958 Ops.push_back(V);
10959 continue;
10960 }
10961 // "cast" fixed length vector to a scalable vector.
10962 MVT OpVT = V.getSimpleValueType();
10963 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
10964 assert(useRVVForFixedLengthVectorVT(OpVT) &&
10965 "Only fixed length vectors are supported!");
10966 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10967 }
10968
10969 if (!VT.isFixedLengthVector())
10970 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
10971
10972 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
10973
10974 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
10975}
10976
10977SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
10978 SelectionDAG &DAG) const {
10979 SDLoc DL(Op);
10980 MVT VT = Op.getSimpleValueType();
10981
10982 SDValue Src = Op.getOperand(0);
10983 // NOTE: Mask is dropped.
10984 SDValue VL = Op.getOperand(2);
10985
10986 MVT ContainerVT = VT;
10987 if (VT.isFixedLengthVector()) {
10988 ContainerVT = getContainerForFixedLengthVector(VT);
10989 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10990 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
10991 }
10992
10993 MVT XLenVT = Subtarget.getXLenVT();
10994 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10995 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10996 DAG.getUNDEF(ContainerVT), Zero, VL);
10997
10998 SDValue SplatValue = DAG.getSignedConstant(
10999 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
11000 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11001 DAG.getUNDEF(ContainerVT), SplatValue, VL);
11002
11003 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
11004 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
11005 if (!VT.isFixedLengthVector())
11006 return Result;
11007 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11008}
11009
11010SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
11011 SelectionDAG &DAG) const {
11012 SDLoc DL(Op);
11013 MVT VT = Op.getSimpleValueType();
11014
11015 SDValue Op1 = Op.getOperand(0);
11016 SDValue Op2 = Op.getOperand(1);
11017 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11018 // NOTE: Mask is dropped.
11019 SDValue VL = Op.getOperand(4);
11020
11021 MVT ContainerVT = VT;
11022 if (VT.isFixedLengthVector()) {
11023 ContainerVT = getContainerForFixedLengthVector(VT);
11024 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11025 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11026 }
11027
11029 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11030
11031 switch (Condition) {
11032 default:
11033 break;
11034 // X != Y --> (X^Y)
11035 case ISD::SETNE:
11036 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11037 break;
11038 // X == Y --> ~(X^Y)
11039 case ISD::SETEQ: {
11040 SDValue Temp =
11041 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11042 Result =
11043 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
11044 break;
11045 }
11046 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11047 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11048 case ISD::SETGT:
11049 case ISD::SETULT: {
11050 SDValue Temp =
11051 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11052 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
11053 break;
11054 }
11055 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
11056 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
11057 case ISD::SETLT:
11058 case ISD::SETUGT: {
11059 SDValue Temp =
11060 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11061 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
11062 break;
11063 }
11064 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11065 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11066 case ISD::SETGE:
11067 case ISD::SETULE: {
11068 SDValue Temp =
11069 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11070 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
11071 break;
11072 }
11073 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11074 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11075 case ISD::SETLE:
11076 case ISD::SETUGE: {
11077 SDValue Temp =
11078 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11079 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
11080 break;
11081 }
11082 }
11083
11084 if (!VT.isFixedLengthVector())
11085 return Result;
11086 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11087}
11088
11089// Lower Floating-Point/Integer Type-Convert VP SDNodes
11090SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
11091 SelectionDAG &DAG) const {
11092 SDLoc DL(Op);
11093
11094 SDValue Src = Op.getOperand(0);
11095 SDValue Mask = Op.getOperand(1);
11096 SDValue VL = Op.getOperand(2);
11097 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11098
11099 MVT DstVT = Op.getSimpleValueType();
11100 MVT SrcVT = Src.getSimpleValueType();
11101 if (DstVT.isFixedLengthVector()) {
11102 DstVT = getContainerForFixedLengthVector(DstVT);
11103 SrcVT = getContainerForFixedLengthVector(SrcVT);
11104 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11105 MVT MaskVT = getMaskTypeFor(DstVT);
11106 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11107 }
11108
11109 unsigned DstEltSize = DstVT.getScalarSizeInBits();
11110 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
11111
11113 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
11114 if (SrcVT.isInteger()) {
11115 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11116
11117 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
11120
11121 // Do we need to do any pre-widening before converting?
11122 if (SrcEltSize == 1) {
11123 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
11124 MVT XLenVT = Subtarget.getXLenVT();
11125 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11126 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11127 DAG.getUNDEF(IntVT), Zero, VL);
11128 SDValue One = DAG.getSignedConstant(
11129 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
11130 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11131 DAG.getUNDEF(IntVT), One, VL);
11132 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
11133 ZeroSplat, DAG.getUNDEF(IntVT), VL);
11134 } else if (DstEltSize > (2 * SrcEltSize)) {
11135 // Widen before converting.
11136 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
11137 DstVT.getVectorElementCount());
11138 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
11139 }
11140
11141 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11142 } else {
11143 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11144 "Wrong input/output vector types");
11145
11146 // Convert f16 to f32 then convert f32 to i64.
11147 if (DstEltSize > (2 * SrcEltSize)) {
11148 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11149 MVT InterimFVT =
11150 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11151 Src =
11152 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
11153 }
11154
11155 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11156 }
11157 } else { // Narrowing + Conversion
11158 if (SrcVT.isInteger()) {
11159 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11160 // First do a narrowing convert to an FP type half the size, then round
11161 // the FP type to a small FP type if needed.
11162
11163 MVT InterimFVT = DstVT;
11164 if (SrcEltSize > (2 * DstEltSize)) {
11165 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
11166 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11167 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11168 }
11169
11170 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
11171
11172 if (InterimFVT != DstVT) {
11173 Src = Result;
11174 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
11175 }
11176 } else {
11177 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11178 "Wrong input/output vector types");
11179 // First do a narrowing conversion to an integer half the size, then
11180 // truncate if needed.
11181
11182 if (DstEltSize == 1) {
11183 // First convert to the same size integer, then convert to mask using
11184 // setcc.
11185 assert(SrcEltSize >= 16 && "Unexpected FP type!");
11186 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
11187 DstVT.getVectorElementCount());
11188 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11189
11190 // Compare the integer result to 0. The integer should be 0 or 1/-1,
11191 // otherwise the conversion was undefined.
11192 MVT XLenVT = Subtarget.getXLenVT();
11193 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
11194 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
11195 DAG.getUNDEF(InterimIVT), SplatZero, VL);
11196 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
11197 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
11198 DAG.getUNDEF(DstVT), Mask, VL});
11199 } else {
11200 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11201 DstVT.getVectorElementCount());
11202
11203 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11204
11205 while (InterimIVT != DstVT) {
11206 SrcEltSize /= 2;
11207 Src = Result;
11208 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11209 DstVT.getVectorElementCount());
11210 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
11211 Src, Mask, VL);
11212 }
11213 }
11214 }
11215 }
11216
11217 MVT VT = Op.getSimpleValueType();
11218 if (!VT.isFixedLengthVector())
11219 return Result;
11220 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11221}
11222
11223SDValue
11224RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
11225 SelectionDAG &DAG) const {
11226 SDLoc DL(Op);
11227
11228 SDValue Op1 = Op.getOperand(0);
11229 SDValue Op2 = Op.getOperand(1);
11230 SDValue Offset = Op.getOperand(2);
11231 SDValue Mask = Op.getOperand(3);
11232 SDValue EVL1 = Op.getOperand(4);
11233 SDValue EVL2 = Op.getOperand(5);
11234
11235 const MVT XLenVT = Subtarget.getXLenVT();
11236 MVT VT = Op.getSimpleValueType();
11237 MVT ContainerVT = VT;
11238 if (VT.isFixedLengthVector()) {
11239 ContainerVT = getContainerForFixedLengthVector(VT);
11240 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11241 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11242 MVT MaskVT = getMaskTypeFor(ContainerVT);
11243 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11244 }
11245
11246 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
11247 if (IsMaskVector) {
11248 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
11249
11250 // Expand input operands
11251 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11252 DAG.getUNDEF(ContainerVT),
11253 DAG.getConstant(1, DL, XLenVT), EVL1);
11254 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11255 DAG.getUNDEF(ContainerVT),
11256 DAG.getConstant(0, DL, XLenVT), EVL1);
11257 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
11258 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
11259
11260 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11261 DAG.getUNDEF(ContainerVT),
11262 DAG.getConstant(1, DL, XLenVT), EVL2);
11263 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11264 DAG.getUNDEF(ContainerVT),
11265 DAG.getConstant(0, DL, XLenVT), EVL2);
11266 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
11267 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
11268 }
11269
11270 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
11271 SDValue DownOffset, UpOffset;
11272 if (ImmValue >= 0) {
11273 // The operand is a TargetConstant, we need to rebuild it as a regular
11274 // constant.
11275 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11276 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
11277 } else {
11278 // The operand is a TargetConstant, we need to rebuild it as a regular
11279 // constant rather than negating the original operand.
11280 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11281 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
11282 }
11283
11284 SDValue SlideDown =
11285 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11286 Op1, DownOffset, Mask, UpOffset);
11287 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
11288 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
11289
11290 if (IsMaskVector) {
11291 // Truncate Result back to a mask vector (Result has same EVL as Op2)
11292 Result = DAG.getNode(
11293 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
11294 {Result, DAG.getConstant(0, DL, ContainerVT),
11295 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
11296 Mask, EVL2});
11297 }
11298
11299 if (!VT.isFixedLengthVector())
11300 return Result;
11301 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11302}
11303
11304SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
11305 SelectionDAG &DAG) const {
11306 SDLoc DL(Op);
11307 SDValue Val = Op.getOperand(0);
11308 SDValue Mask = Op.getOperand(1);
11309 SDValue VL = Op.getOperand(2);
11310 MVT VT = Op.getSimpleValueType();
11311
11312 MVT ContainerVT = VT;
11313 if (VT.isFixedLengthVector()) {
11314 ContainerVT = getContainerForFixedLengthVector(VT);
11315 MVT MaskVT = getMaskTypeFor(ContainerVT);
11316 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11317 }
11318
11319 SDValue Result =
11320 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
11321
11322 if (!VT.isFixedLengthVector())
11323 return Result;
11324 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11325}
11326
11327SDValue
11328RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
11329 SelectionDAG &DAG) const {
11330 SDLoc DL(Op);
11331 MVT VT = Op.getSimpleValueType();
11332 MVT XLenVT = Subtarget.getXLenVT();
11333
11334 SDValue Op1 = Op.getOperand(0);
11335 SDValue Mask = Op.getOperand(1);
11336 SDValue EVL = Op.getOperand(2);
11337
11338 MVT ContainerVT = VT;
11339 if (VT.isFixedLengthVector()) {
11340 ContainerVT = getContainerForFixedLengthVector(VT);
11341 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11342 MVT MaskVT = getMaskTypeFor(ContainerVT);
11343 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11344 }
11345
11346 MVT GatherVT = ContainerVT;
11347 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
11348 // Check if we are working with mask vectors
11349 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
11350 if (IsMaskVector) {
11351 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
11352
11353 // Expand input operand
11354 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11355 DAG.getUNDEF(IndicesVT),
11356 DAG.getConstant(1, DL, XLenVT), EVL);
11357 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11358 DAG.getUNDEF(IndicesVT),
11359 DAG.getConstant(0, DL, XLenVT), EVL);
11360 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
11361 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
11362 }
11363
11364 unsigned EltSize = GatherVT.getScalarSizeInBits();
11365 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
11366 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11367 unsigned MaxVLMAX =
11368 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11369
11370 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11371 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
11372 // to use vrgatherei16.vv.
11373 // TODO: It's also possible to use vrgatherei16.vv for other types to
11374 // decrease register width for the index calculation.
11375 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11376 if (MaxVLMAX > 256 && EltSize == 8) {
11377 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
11378 // Split the vector in half and reverse each half using a full register
11379 // reverse.
11380 // Swap the halves and concatenate them.
11381 // Slide the concatenated result by (VLMax - VL).
11382 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11383 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
11384 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
11385
11386 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11387 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11388
11389 // Reassemble the low and high pieces reversed.
11390 // NOTE: this Result is unmasked (because we do not need masks for
11391 // shuffles). If in the future this has to change, we can use a SELECT_VL
11392 // between Result and UNDEF using the mask originally passed to VP_REVERSE
11393 SDValue Result =
11394 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
11395
11396 // Slide off any elements from past EVL that were reversed into the low
11397 // elements.
11398 unsigned MinElts = GatherVT.getVectorMinNumElements();
11399 SDValue VLMax =
11400 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
11401 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
11402
11403 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
11404 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
11405
11406 if (IsMaskVector) {
11407 // Truncate Result back to a mask vector
11408 Result =
11409 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
11410 {Result, DAG.getConstant(0, DL, GatherVT),
11412 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11413 }
11414
11415 if (!VT.isFixedLengthVector())
11416 return Result;
11417 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11418 }
11419
11420 // Just promote the int type to i16 which will double the LMUL.
11421 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
11422 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11423 }
11424
11425 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
11426 SDValue VecLen =
11427 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
11428 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11429 DAG.getUNDEF(IndicesVT), VecLen, EVL);
11430 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
11431 DAG.getUNDEF(IndicesVT), Mask, EVL);
11432 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
11433 DAG.getUNDEF(GatherVT), Mask, EVL);
11434
11435 if (IsMaskVector) {
11436 // Truncate Result back to a mask vector
11437 Result = DAG.getNode(
11438 RISCVISD::SETCC_VL, DL, ContainerVT,
11439 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
11440 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11441 }
11442
11443 if (!VT.isFixedLengthVector())
11444 return Result;
11445 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11446}
11447
11448SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
11449 SelectionDAG &DAG) const {
11450 MVT VT = Op.getSimpleValueType();
11451 if (VT.getVectorElementType() != MVT::i1)
11452 return lowerVPOp(Op, DAG);
11453
11454 // It is safe to drop mask parameter as masked-off elements are undef.
11455 SDValue Op1 = Op->getOperand(0);
11456 SDValue Op2 = Op->getOperand(1);
11457 SDValue VL = Op->getOperand(3);
11458
11459 MVT ContainerVT = VT;
11460 const bool IsFixed = VT.isFixedLengthVector();
11461 if (IsFixed) {
11462 ContainerVT = getContainerForFixedLengthVector(VT);
11463 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11464 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11465 }
11466
11467 SDLoc DL(Op);
11468 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
11469 if (!IsFixed)
11470 return Val;
11471 return convertFromScalableVector(VT, Val, DAG, Subtarget);
11472}
11473
11474SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
11475 SelectionDAG &DAG) const {
11476 SDLoc DL(Op);
11477 MVT XLenVT = Subtarget.getXLenVT();
11478 MVT VT = Op.getSimpleValueType();
11479 MVT ContainerVT = VT;
11480 if (VT.isFixedLengthVector())
11481 ContainerVT = getContainerForFixedLengthVector(VT);
11482
11483 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11484
11485 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
11486 // Check if the mask is known to be all ones
11487 SDValue Mask = VPNode->getMask();
11488 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11489
11490 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
11491 : Intrinsic::riscv_vlse_mask,
11492 DL, XLenVT);
11493 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
11494 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
11495 VPNode->getStride()};
11496 if (!IsUnmasked) {
11497 if (VT.isFixedLengthVector()) {
11498 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11499 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11500 }
11501 Ops.push_back(Mask);
11502 }
11503 Ops.push_back(VPNode->getVectorLength());
11504 if (!IsUnmasked) {
11505 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
11506 Ops.push_back(Policy);
11507 }
11508
11509 SDValue Result =
11511 VPNode->getMemoryVT(), VPNode->getMemOperand());
11512 SDValue Chain = Result.getValue(1);
11513
11514 if (VT.isFixedLengthVector())
11515 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11516
11517 return DAG.getMergeValues({Result, Chain}, DL);
11518}
11519
11520SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
11521 SelectionDAG &DAG) const {
11522 SDLoc DL(Op);
11523 MVT XLenVT = Subtarget.getXLenVT();
11524
11525 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
11526 SDValue StoreVal = VPNode->getValue();
11527 MVT VT = StoreVal.getSimpleValueType();
11528 MVT ContainerVT = VT;
11529 if (VT.isFixedLengthVector()) {
11530 ContainerVT = getContainerForFixedLengthVector(VT);
11531 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11532 }
11533
11534 // Check if the mask is known to be all ones
11535 SDValue Mask = VPNode->getMask();
11536 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11537
11538 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
11539 : Intrinsic::riscv_vsse_mask,
11540 DL, XLenVT);
11541 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
11542 VPNode->getBasePtr(), VPNode->getStride()};
11543 if (!IsUnmasked) {
11544 if (VT.isFixedLengthVector()) {
11545 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11546 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11547 }
11548 Ops.push_back(Mask);
11549 }
11550 Ops.push_back(VPNode->getVectorLength());
11551
11552 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
11553 Ops, VPNode->getMemoryVT(),
11554 VPNode->getMemOperand());
11555}
11556
11557// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
11558// matched to a RVV indexed load. The RVV indexed load instructions only
11559// support the "unsigned unscaled" addressing mode; indices are implicitly
11560// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11561// signed or scaled indexing is extended to the XLEN value type and scaled
11562// accordingly.
11563SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
11564 SelectionDAG &DAG) const {
11565 SDLoc DL(Op);
11566 MVT VT = Op.getSimpleValueType();
11567
11568 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11569 EVT MemVT = MemSD->getMemoryVT();
11570 MachineMemOperand *MMO = MemSD->getMemOperand();
11571 SDValue Chain = MemSD->getChain();
11572 SDValue BasePtr = MemSD->getBasePtr();
11573
11574 [[maybe_unused]] ISD::LoadExtType LoadExtType;
11575 SDValue Index, Mask, PassThru, VL;
11576
11577 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
11578 Index = VPGN->getIndex();
11579 Mask = VPGN->getMask();
11580 PassThru = DAG.getUNDEF(VT);
11581 VL = VPGN->getVectorLength();
11582 // VP doesn't support extending loads.
11584 } else {
11585 // Else it must be a MGATHER.
11586 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
11587 Index = MGN->getIndex();
11588 Mask = MGN->getMask();
11589 PassThru = MGN->getPassThru();
11590 LoadExtType = MGN->getExtensionType();
11591 }
11592
11593 MVT IndexVT = Index.getSimpleValueType();
11594 MVT XLenVT = Subtarget.getXLenVT();
11595
11597 "Unexpected VTs!");
11598 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11599 // Targets have to explicitly opt-in for extending vector loads.
11600 assert(LoadExtType == ISD::NON_EXTLOAD &&
11601 "Unexpected extending MGATHER/VP_GATHER");
11602
11603 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11604 // the selection of the masked intrinsics doesn't do this for us.
11605 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11606
11607 MVT ContainerVT = VT;
11608 if (VT.isFixedLengthVector()) {
11609 ContainerVT = getContainerForFixedLengthVector(VT);
11610 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11611 ContainerVT.getVectorElementCount());
11612
11613 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11614
11615 if (!IsUnmasked) {
11616 MVT MaskVT = getMaskTypeFor(ContainerVT);
11617 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11618 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11619 }
11620 }
11621
11622 if (!VL)
11623 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11624
11625 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11626 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11627 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11628 }
11629
11630 unsigned IntID =
11631 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
11632 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11633 if (IsUnmasked)
11634 Ops.push_back(DAG.getUNDEF(ContainerVT));
11635 else
11636 Ops.push_back(PassThru);
11637 Ops.push_back(BasePtr);
11638 Ops.push_back(Index);
11639 if (!IsUnmasked)
11640 Ops.push_back(Mask);
11641 Ops.push_back(VL);
11642 if (!IsUnmasked)
11644
11645 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11646 SDValue Result =
11647 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11648 Chain = Result.getValue(1);
11649
11650 if (VT.isFixedLengthVector())
11651 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11652
11653 return DAG.getMergeValues({Result, Chain}, DL);
11654}
11655
11656// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
11657// matched to a RVV indexed store. The RVV indexed store instructions only
11658// support the "unsigned unscaled" addressing mode; indices are implicitly
11659// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11660// signed or scaled indexing is extended to the XLEN value type and scaled
11661// accordingly.
11662SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
11663 SelectionDAG &DAG) const {
11664 SDLoc DL(Op);
11665 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11666 EVT MemVT = MemSD->getMemoryVT();
11667 MachineMemOperand *MMO = MemSD->getMemOperand();
11668 SDValue Chain = MemSD->getChain();
11669 SDValue BasePtr = MemSD->getBasePtr();
11670
11671 [[maybe_unused]] bool IsTruncatingStore = false;
11672 SDValue Index, Mask, Val, VL;
11673
11674 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
11675 Index = VPSN->getIndex();
11676 Mask = VPSN->getMask();
11677 Val = VPSN->getValue();
11678 VL = VPSN->getVectorLength();
11679 // VP doesn't support truncating stores.
11680 IsTruncatingStore = false;
11681 } else {
11682 // Else it must be a MSCATTER.
11683 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
11684 Index = MSN->getIndex();
11685 Mask = MSN->getMask();
11686 Val = MSN->getValue();
11687 IsTruncatingStore = MSN->isTruncatingStore();
11688 }
11689
11690 MVT VT = Val.getSimpleValueType();
11691 MVT IndexVT = Index.getSimpleValueType();
11692 MVT XLenVT = Subtarget.getXLenVT();
11693
11695 "Unexpected VTs!");
11696 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11697 // Targets have to explicitly opt-in for extending vector loads and
11698 // truncating vector stores.
11699 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
11700
11701 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11702 // the selection of the masked intrinsics doesn't do this for us.
11703 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11704
11705 MVT ContainerVT = VT;
11706 if (VT.isFixedLengthVector()) {
11707 ContainerVT = getContainerForFixedLengthVector(VT);
11708 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11709 ContainerVT.getVectorElementCount());
11710
11711 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11712 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11713
11714 if (!IsUnmasked) {
11715 MVT MaskVT = getMaskTypeFor(ContainerVT);
11716 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11717 }
11718 }
11719
11720 if (!VL)
11721 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11722
11723 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11724 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11725 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11726 }
11727
11728 unsigned IntID =
11729 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
11730 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11731 Ops.push_back(Val);
11732 Ops.push_back(BasePtr);
11733 Ops.push_back(Index);
11734 if (!IsUnmasked)
11735 Ops.push_back(Mask);
11736 Ops.push_back(VL);
11737
11739 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11740}
11741
11742SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
11743 SelectionDAG &DAG) const {
11744 const MVT XLenVT = Subtarget.getXLenVT();
11745 SDLoc DL(Op);
11746 SDValue Chain = Op->getOperand(0);
11747 SDValue SysRegNo = DAG.getTargetConstant(
11748 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11749 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
11750 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
11751
11752 // Encoding used for rounding mode in RISC-V differs from that used in
11753 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
11754 // table, which consists of a sequence of 4-bit fields, each representing
11755 // corresponding FLT_ROUNDS mode.
11756 static const int Table =
11762
11763 SDValue Shift =
11764 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
11765 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11766 DAG.getConstant(Table, DL, XLenVT), Shift);
11767 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11768 DAG.getConstant(7, DL, XLenVT));
11769
11770 return DAG.getMergeValues({Masked, Chain}, DL);
11771}
11772
11773SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
11774 SelectionDAG &DAG) const {
11775 const MVT XLenVT = Subtarget.getXLenVT();
11776 SDLoc DL(Op);
11777 SDValue Chain = Op->getOperand(0);
11778 SDValue RMValue = Op->getOperand(1);
11779 SDValue SysRegNo = DAG.getTargetConstant(
11780 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11781
11782 // Encoding used for rounding mode in RISC-V differs from that used in
11783 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
11784 // a table, which consists of a sequence of 4-bit fields, each representing
11785 // corresponding RISC-V mode.
11786 static const unsigned Table =
11792
11793 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
11794
11795 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
11796 DAG.getConstant(2, DL, XLenVT));
11797 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11798 DAG.getConstant(Table, DL, XLenVT), Shift);
11799 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11800 DAG.getConstant(0x7, DL, XLenVT));
11801 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
11802 RMValue);
11803}
11804
11805SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
11806 SelectionDAG &DAG) const {
11808
11809 bool isRISCV64 = Subtarget.is64Bit();
11810 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11811
11812 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
11813 return DAG.getFrameIndex(FI, PtrVT);
11814}
11815
11816// Returns the opcode of the target-specific SDNode that implements the 32-bit
11817// form of the given Opcode.
11818static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
11819 switch (Opcode) {
11820 default:
11821 llvm_unreachable("Unexpected opcode");
11822 case ISD::SHL:
11823 return RISCVISD::SLLW;
11824 case ISD::SRA:
11825 return RISCVISD::SRAW;
11826 case ISD::SRL:
11827 return RISCVISD::SRLW;
11828 case ISD::SDIV:
11829 return RISCVISD::DIVW;
11830 case ISD::UDIV:
11831 return RISCVISD::DIVUW;
11832 case ISD::UREM:
11833 return RISCVISD::REMUW;
11834 case ISD::ROTL:
11835 return RISCVISD::ROLW;
11836 case ISD::ROTR:
11837 return RISCVISD::RORW;
11838 }
11839}
11840
11841// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
11842// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
11843// otherwise be promoted to i64, making it difficult to select the
11844// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
11845// type i8/i16/i32 is lost.
11847 unsigned ExtOpc = ISD::ANY_EXTEND) {
11848 SDLoc DL(N);
11849 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
11850 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
11851 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
11852 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
11853 // ReplaceNodeResults requires we maintain the same type for the return value.
11854 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
11855}
11856
11857// Converts the given 32-bit operation to a i64 operation with signed extension
11858// semantic to reduce the signed extension instructions.
11860 SDLoc DL(N);
11861 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11862 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11863 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
11864 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
11865 DAG.getValueType(MVT::i32));
11866 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
11867}
11868
11871 SelectionDAG &DAG) const {
11872 SDLoc DL(N);
11873 switch (N->getOpcode()) {
11874 default:
11875 llvm_unreachable("Don't know how to custom type legalize this operation!");
11878 case ISD::FP_TO_SINT:
11879 case ISD::FP_TO_UINT: {
11880 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11881 "Unexpected custom legalisation");
11882 bool IsStrict = N->isStrictFPOpcode();
11883 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
11884 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
11885 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
11886 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
11888 if (!isTypeLegal(Op0.getValueType()))
11889 return;
11890 if (IsStrict) {
11891 SDValue Chain = N->getOperand(0);
11892 // In absense of Zfh, promote f16 to f32, then convert.
11893 if (Op0.getValueType() == MVT::f16 &&
11894 !Subtarget.hasStdExtZfhOrZhinx()) {
11895 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
11896 {Chain, Op0});
11897 Chain = Op0.getValue(1);
11898 }
11899 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
11901 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
11902 SDValue Res = DAG.getNode(
11903 Opc, DL, VTs, Chain, Op0,
11904 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11905 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11906 Results.push_back(Res.getValue(1));
11907 return;
11908 }
11909 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
11910 // convert.
11911 if ((Op0.getValueType() == MVT::f16 &&
11912 !Subtarget.hasStdExtZfhOrZhinx()) ||
11913 Op0.getValueType() == MVT::bf16)
11914 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11915
11916 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
11917 SDValue Res =
11918 DAG.getNode(Opc, DL, MVT::i64, Op0,
11919 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11920 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11921 return;
11922 }
11923 // If the FP type needs to be softened, emit a library call using the 'si'
11924 // version. If we left it to default legalization we'd end up with 'di'. If
11925 // the FP type doesn't need to be softened just let generic type
11926 // legalization promote the result type.
11927 RTLIB::Libcall LC;
11928 if (IsSigned)
11929 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
11930 else
11931 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
11932 MakeLibCallOptions CallOptions;
11933 EVT OpVT = Op0.getValueType();
11934 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
11935 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
11936 SDValue Result;
11937 std::tie(Result, Chain) =
11938 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
11939 Results.push_back(Result);
11940 if (IsStrict)
11941 Results.push_back(Chain);
11942 break;
11943 }
11944 case ISD::LROUND: {
11945 SDValue Op0 = N->getOperand(0);
11946 EVT Op0VT = Op0.getValueType();
11947 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
11949 if (!isTypeLegal(Op0VT))
11950 return;
11951
11952 // In absense of Zfh, promote f16 to f32, then convert.
11953 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
11954 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11955
11956 SDValue Res =
11957 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
11958 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
11959 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11960 return;
11961 }
11962 // If the FP type needs to be softened, emit a library call to lround. We'll
11963 // need to truncate the result. We assume any value that doesn't fit in i32
11964 // is allowed to return an unspecified value.
11965 RTLIB::Libcall LC =
11966 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
11967 MakeLibCallOptions CallOptions;
11968 EVT OpVT = Op0.getValueType();
11969 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
11970 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
11971 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
11972 Results.push_back(Result);
11973 break;
11974 }
11977 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
11978 "has custom type legalization on riscv32");
11979
11980 SDValue LoCounter, HiCounter;
11981 MVT XLenVT = Subtarget.getXLenVT();
11982 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
11983 LoCounter = DAG.getTargetConstant(
11984 RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT);
11985 HiCounter = DAG.getTargetConstant(
11986 RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT);
11987 } else {
11988 LoCounter = DAG.getTargetConstant(
11989 RISCVSysReg::lookupSysRegByName("TIME")->Encoding, DL, XLenVT);
11990 HiCounter = DAG.getTargetConstant(
11991 RISCVSysReg::lookupSysRegByName("TIMEH")->Encoding, DL, XLenVT);
11992 }
11993 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11995 N->getOperand(0), LoCounter, HiCounter);
11996
11997 Results.push_back(
11998 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
11999 Results.push_back(RCW.getValue(2));
12000 break;
12001 }
12002 case ISD::LOAD: {
12003 if (!ISD::isNON_EXTLoad(N))
12004 return;
12005
12006 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
12007 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
12008 LoadSDNode *Ld = cast<LoadSDNode>(N);
12009
12010 SDLoc dl(N);
12011 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
12012 Ld->getBasePtr(), Ld->getMemoryVT(),
12013 Ld->getMemOperand());
12014 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
12015 Results.push_back(Res.getValue(1));
12016 return;
12017 }
12018 case ISD::MUL: {
12019 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
12020 unsigned XLen = Subtarget.getXLen();
12021 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
12022 if (Size > XLen) {
12023 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
12024 SDValue LHS = N->getOperand(0);
12025 SDValue RHS = N->getOperand(1);
12026 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
12027
12028 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
12029 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
12030 // We need exactly one side to be unsigned.
12031 if (LHSIsU == RHSIsU)
12032 return;
12033
12034 auto MakeMULPair = [&](SDValue S, SDValue U) {
12035 MVT XLenVT = Subtarget.getXLenVT();
12036 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
12037 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
12038 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
12039 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
12040 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
12041 };
12042
12043 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
12044 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
12045
12046 // The other operand should be signed, but still prefer MULH when
12047 // possible.
12048 if (RHSIsU && LHSIsS && !RHSIsS)
12049 Results.push_back(MakeMULPair(LHS, RHS));
12050 else if (LHSIsU && RHSIsS && !LHSIsS)
12051 Results.push_back(MakeMULPair(RHS, LHS));
12052
12053 return;
12054 }
12055 [[fallthrough]];
12056 }
12057 case ISD::ADD:
12058 case ISD::SUB:
12059 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12060 "Unexpected custom legalisation");
12061 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
12062 break;
12063 case ISD::SHL:
12064 case ISD::SRA:
12065 case ISD::SRL:
12066 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12067 "Unexpected custom legalisation");
12068 if (N->getOperand(1).getOpcode() != ISD::Constant) {
12069 // If we can use a BSET instruction, allow default promotion to apply.
12070 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
12071 isOneConstant(N->getOperand(0)))
12072 break;
12073 Results.push_back(customLegalizeToWOp(N, DAG));
12074 break;
12075 }
12076
12077 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
12078 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
12079 // shift amount.
12080 if (N->getOpcode() == ISD::SHL) {
12081 SDLoc DL(N);
12082 SDValue NewOp0 =
12083 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12084 SDValue NewOp1 =
12085 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
12086 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
12087 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12088 DAG.getValueType(MVT::i32));
12089 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12090 }
12091
12092 break;
12093 case ISD::ROTL:
12094 case ISD::ROTR:
12095 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12096 "Unexpected custom legalisation");
12097 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
12098 Subtarget.hasVendorXTHeadBb()) &&
12099 "Unexpected custom legalization");
12100 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
12101 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
12102 return;
12103 Results.push_back(customLegalizeToWOp(N, DAG));
12104 break;
12105 case ISD::CTTZ:
12107 case ISD::CTLZ:
12108 case ISD::CTLZ_ZERO_UNDEF: {
12109 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12110 "Unexpected custom legalisation");
12111
12112 SDValue NewOp0 =
12113 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12114 bool IsCTZ =
12115 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
12116 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
12117 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
12118 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12119 return;
12120 }
12121 case ISD::SDIV:
12122 case ISD::UDIV:
12123 case ISD::UREM: {
12124 MVT VT = N->getSimpleValueType(0);
12125 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
12126 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
12127 "Unexpected custom legalisation");
12128 // Don't promote division/remainder by constant since we should expand those
12129 // to multiply by magic constant.
12131 if (N->getOperand(1).getOpcode() == ISD::Constant &&
12132 !isIntDivCheap(N->getValueType(0), Attr))
12133 return;
12134
12135 // If the input is i32, use ANY_EXTEND since the W instructions don't read
12136 // the upper 32 bits. For other types we need to sign or zero extend
12137 // based on the opcode.
12138 unsigned ExtOpc = ISD::ANY_EXTEND;
12139 if (VT != MVT::i32)
12140 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
12142
12143 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
12144 break;
12145 }
12146 case ISD::SADDO: {
12147 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12148 "Unexpected custom legalisation");
12149
12150 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
12151 // use the default legalization.
12152 if (!isa<ConstantSDNode>(N->getOperand(1)))
12153 return;
12154
12155 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12156 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12157 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
12158 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12159 DAG.getValueType(MVT::i32));
12160
12161 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
12162
12163 // For an addition, the result should be less than one of the operands (LHS)
12164 // if and only if the other operand (RHS) is negative, otherwise there will
12165 // be overflow.
12166 // For a subtraction, the result should be less than one of the operands
12167 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
12168 // otherwise there will be overflow.
12169 EVT OType = N->getValueType(1);
12170 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
12171 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
12172
12173 SDValue Overflow =
12174 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
12175 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12176 Results.push_back(Overflow);
12177 return;
12178 }
12179 case ISD::UADDO:
12180 case ISD::USUBO: {
12181 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12182 "Unexpected custom legalisation");
12183 bool IsAdd = N->getOpcode() == ISD::UADDO;
12184 // Create an ADDW or SUBW.
12185 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12186 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12187 SDValue Res =
12188 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
12189 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12190 DAG.getValueType(MVT::i32));
12191
12192 SDValue Overflow;
12193 if (IsAdd && isOneConstant(RHS)) {
12194 // Special case uaddo X, 1 overflowed if the addition result is 0.
12195 // The general case (X + C) < C is not necessarily beneficial. Although we
12196 // reduce the live range of X, we may introduce the materialization of
12197 // constant C, especially when the setcc result is used by branch. We have
12198 // no compare with constant and branch instructions.
12199 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
12200 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
12201 } else if (IsAdd && isAllOnesConstant(RHS)) {
12202 // Special case uaddo X, -1 overflowed if X != 0.
12203 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
12204 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
12205 } else {
12206 // Sign extend the LHS and perform an unsigned compare with the ADDW
12207 // result. Since the inputs are sign extended from i32, this is equivalent
12208 // to comparing the lower 32 bits.
12209 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12210 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
12211 IsAdd ? ISD::SETULT : ISD::SETUGT);
12212 }
12213
12214 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12215 Results.push_back(Overflow);
12216 return;
12217 }
12218 case ISD::UADDSAT:
12219 case ISD::USUBSAT: {
12220 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12221 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
12222 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
12223 // promotion for UADDO/USUBO.
12224 Results.push_back(expandAddSubSat(N, DAG));
12225 return;
12226 }
12227 case ISD::SADDSAT:
12228 case ISD::SSUBSAT: {
12229 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12230 "Unexpected custom legalisation");
12231 Results.push_back(expandAddSubSat(N, DAG));
12232 return;
12233 }
12234 case ISD::ABS: {
12235 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12236 "Unexpected custom legalisation");
12237
12238 if (Subtarget.hasStdExtZbb()) {
12239 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
12240 // This allows us to remember that the result is sign extended. Expanding
12241 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
12242 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
12243 N->getOperand(0));
12244 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
12245 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
12246 return;
12247 }
12248
12249 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
12250 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12251
12252 // Freeze the source so we can increase it's use count.
12253 Src = DAG.getFreeze(Src);
12254
12255 // Copy sign bit to all bits using the sraiw pattern.
12256 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
12257 DAG.getValueType(MVT::i32));
12258 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
12259 DAG.getConstant(31, DL, MVT::i64));
12260
12261 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
12262 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
12263
12264 // NOTE: The result is only required to be anyextended, but sext is
12265 // consistent with type legalization of sub.
12266 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
12267 DAG.getValueType(MVT::i32));
12268 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12269 return;
12270 }
12271 case ISD::BITCAST: {
12272 EVT VT = N->getValueType(0);
12273 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
12274 SDValue Op0 = N->getOperand(0);
12275 EVT Op0VT = Op0.getValueType();
12276 MVT XLenVT = Subtarget.getXLenVT();
12277 if (VT == MVT::i16 && Op0VT == MVT::f16 &&
12278 Subtarget.hasStdExtZfhminOrZhinxmin()) {
12279 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12280 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12281 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
12282 Subtarget.hasStdExtZfbfmin()) {
12283 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12284 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12285 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
12286 Subtarget.hasStdExtFOrZfinx()) {
12287 SDValue FPConv =
12288 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
12289 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
12290 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32) {
12291 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
12292 DAG.getVTList(MVT::i32, MVT::i32), Op0);
12293 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
12294 NewReg.getValue(0), NewReg.getValue(1));
12295 Results.push_back(RetReg);
12296 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
12297 isTypeLegal(Op0VT)) {
12298 // Custom-legalize bitcasts from fixed-length vector types to illegal
12299 // scalar types in order to improve codegen. Bitcast the vector to a
12300 // one-element vector type whose element type is the same as the result
12301 // type, and extract the first element.
12302 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
12303 if (isTypeLegal(BVT)) {
12304 SDValue BVec = DAG.getBitcast(BVT, Op0);
12305 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
12306 DAG.getVectorIdxConstant(0, DL)));
12307 }
12308 }
12309 break;
12310 }
12311 case RISCVISD::BREV8:
12312 case RISCVISD::ORC_B: {
12313 MVT VT = N->getSimpleValueType(0);
12314 MVT XLenVT = Subtarget.getXLenVT();
12315 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
12316 "Unexpected custom legalisation");
12317 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
12318 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
12319 "Unexpected extension");
12320 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
12321 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
12322 // ReplaceNodeResults requires we maintain the same type for the return
12323 // value.
12324 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
12325 break;
12326 }
12328 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
12329 // type is illegal (currently only vXi64 RV32).
12330 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
12331 // transferred to the destination register. We issue two of these from the
12332 // upper- and lower- halves of the SEW-bit vector element, slid down to the
12333 // first element.
12334 SDValue Vec = N->getOperand(0);
12335 SDValue Idx = N->getOperand(1);
12336
12337 // The vector type hasn't been legalized yet so we can't issue target
12338 // specific nodes if it needs legalization.
12339 // FIXME: We would manually legalize if it's important.
12340 if (!isTypeLegal(Vec.getValueType()))
12341 return;
12342
12343 MVT VecVT = Vec.getSimpleValueType();
12344
12345 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
12346 VecVT.getVectorElementType() == MVT::i64 &&
12347 "Unexpected EXTRACT_VECTOR_ELT legalization");
12348
12349 // If this is a fixed vector, we need to convert it to a scalable vector.
12350 MVT ContainerVT = VecVT;
12351 if (VecVT.isFixedLengthVector()) {
12352 ContainerVT = getContainerForFixedLengthVector(VecVT);
12353 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12354 }
12355
12356 MVT XLenVT = Subtarget.getXLenVT();
12357
12358 // Use a VL of 1 to avoid processing more elements than we need.
12359 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
12360
12361 // Unless the index is known to be 0, we must slide the vector down to get
12362 // the desired element into index 0.
12363 if (!isNullConstant(Idx)) {
12364 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12365 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
12366 }
12367
12368 // Extract the lower XLEN bits of the correct vector element.
12369 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12370
12371 // To extract the upper XLEN bits of the vector element, shift the first
12372 // element right by 32 bits and re-extract the lower XLEN bits.
12373 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12374 DAG.getUNDEF(ContainerVT),
12375 DAG.getConstant(32, DL, XLenVT), VL);
12376 SDValue LShr32 =
12377 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
12378 DAG.getUNDEF(ContainerVT), Mask, VL);
12379
12380 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12381
12382 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12383 break;
12384 }
12386 unsigned IntNo = N->getConstantOperandVal(0);
12387 switch (IntNo) {
12388 default:
12390 "Don't know how to custom type legalize this intrinsic!");
12391 case Intrinsic::experimental_get_vector_length: {
12392 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
12393 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12394 return;
12395 }
12396 case Intrinsic::experimental_cttz_elts: {
12397 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
12398 Results.push_back(
12399 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
12400 return;
12401 }
12402 case Intrinsic::riscv_orc_b:
12403 case Intrinsic::riscv_brev8:
12404 case Intrinsic::riscv_sha256sig0:
12405 case Intrinsic::riscv_sha256sig1:
12406 case Intrinsic::riscv_sha256sum0:
12407 case Intrinsic::riscv_sha256sum1:
12408 case Intrinsic::riscv_sm3p0:
12409 case Intrinsic::riscv_sm3p1: {
12410 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12411 return;
12412 unsigned Opc;
12413 switch (IntNo) {
12414 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
12415 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
12416 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
12417 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
12418 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
12419 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
12420 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
12421 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
12422 }
12423
12424 SDValue NewOp =
12425 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12426 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
12427 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12428 return;
12429 }
12430 case Intrinsic::riscv_sm4ks:
12431 case Intrinsic::riscv_sm4ed: {
12432 unsigned Opc =
12433 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
12434 SDValue NewOp0 =
12435 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12436 SDValue NewOp1 =
12437 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12438 SDValue Res =
12439 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
12440 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12441 return;
12442 }
12443 case Intrinsic::riscv_mopr: {
12444 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12445 return;
12446 SDValue NewOp =
12447 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12448 SDValue Res = DAG.getNode(
12449 RISCVISD::MOPR, DL, MVT::i64, NewOp,
12450 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
12451 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12452 return;
12453 }
12454 case Intrinsic::riscv_moprr: {
12455 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12456 return;
12457 SDValue NewOp0 =
12458 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12459 SDValue NewOp1 =
12460 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12461 SDValue Res = DAG.getNode(
12462 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
12463 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
12464 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12465 return;
12466 }
12467 case Intrinsic::riscv_clmul: {
12468 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12469 return;
12470
12471 SDValue NewOp0 =
12472 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12473 SDValue NewOp1 =
12474 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12475 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
12476 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12477 return;
12478 }
12479 case Intrinsic::riscv_clmulh:
12480 case Intrinsic::riscv_clmulr: {
12481 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12482 return;
12483
12484 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
12485 // to the full 128-bit clmul result of multiplying two xlen values.
12486 // Perform clmulr or clmulh on the shifted values. Finally, extract the
12487 // upper 32 bits.
12488 //
12489 // The alternative is to mask the inputs to 32 bits and use clmul, but
12490 // that requires two shifts to mask each input without zext.w.
12491 // FIXME: If the inputs are known zero extended or could be freely
12492 // zero extended, the mask form would be better.
12493 SDValue NewOp0 =
12494 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12495 SDValue NewOp1 =
12496 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12497 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
12498 DAG.getConstant(32, DL, MVT::i64));
12499 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
12500 DAG.getConstant(32, DL, MVT::i64));
12501 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
12503 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
12504 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
12505 DAG.getConstant(32, DL, MVT::i64));
12506 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12507 return;
12508 }
12509 case Intrinsic::riscv_vmv_x_s: {
12510 EVT VT = N->getValueType(0);
12511 MVT XLenVT = Subtarget.getXLenVT();
12512 if (VT.bitsLT(XLenVT)) {
12513 // Simple case just extract using vmv.x.s and truncate.
12514 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
12515 Subtarget.getXLenVT(), N->getOperand(1));
12516 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
12517 return;
12518 }
12519
12520 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
12521 "Unexpected custom legalization");
12522
12523 // We need to do the move in two steps.
12524 SDValue Vec = N->getOperand(1);
12525 MVT VecVT = Vec.getSimpleValueType();
12526
12527 // First extract the lower XLEN bits of the element.
12528 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12529
12530 // To extract the upper XLEN bits of the vector element, shift the first
12531 // element right by 32 bits and re-extract the lower XLEN bits.
12532 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
12533
12534 SDValue ThirtyTwoV =
12535 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
12536 DAG.getConstant(32, DL, XLenVT), VL);
12537 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
12538 DAG.getUNDEF(VecVT), Mask, VL);
12539 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12540
12541 Results.push_back(
12542 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12543 break;
12544 }
12545 }
12546 break;
12547 }
12548 case ISD::VECREDUCE_ADD:
12549 case ISD::VECREDUCE_AND:
12550 case ISD::VECREDUCE_OR:
12551 case ISD::VECREDUCE_XOR:
12556 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
12557 Results.push_back(V);
12558 break;
12559 case ISD::VP_REDUCE_ADD:
12560 case ISD::VP_REDUCE_AND:
12561 case ISD::VP_REDUCE_OR:
12562 case ISD::VP_REDUCE_XOR:
12563 case ISD::VP_REDUCE_SMAX:
12564 case ISD::VP_REDUCE_UMAX:
12565 case ISD::VP_REDUCE_SMIN:
12566 case ISD::VP_REDUCE_UMIN:
12567 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
12568 Results.push_back(V);
12569 break;
12570 case ISD::GET_ROUNDING: {
12571 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
12572 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
12573 Results.push_back(Res.getValue(0));
12574 Results.push_back(Res.getValue(1));
12575 break;
12576 }
12577 }
12578}
12579
12580/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
12581/// which corresponds to it.
12582static unsigned getVecReduceOpcode(unsigned Opc) {
12583 switch (Opc) {
12584 default:
12585 llvm_unreachable("Unhandled binary to transfrom reduction");
12586 case ISD::ADD:
12587 return ISD::VECREDUCE_ADD;
12588 case ISD::UMAX:
12589 return ISD::VECREDUCE_UMAX;
12590 case ISD::SMAX:
12591 return ISD::VECREDUCE_SMAX;
12592 case ISD::UMIN:
12593 return ISD::VECREDUCE_UMIN;
12594 case ISD::SMIN:
12595 return ISD::VECREDUCE_SMIN;
12596 case ISD::AND:
12597 return ISD::VECREDUCE_AND;
12598 case ISD::OR:
12599 return ISD::VECREDUCE_OR;
12600 case ISD::XOR:
12601 return ISD::VECREDUCE_XOR;
12602 case ISD::FADD:
12603 // Note: This is the associative form of the generic reduction opcode.
12604 return ISD::VECREDUCE_FADD;
12605 }
12606}
12607
12608/// Perform two related transforms whose purpose is to incrementally recognize
12609/// an explode_vector followed by scalar reduction as a vector reduction node.
12610/// This exists to recover from a deficiency in SLP which can't handle
12611/// forests with multiple roots sharing common nodes. In some cases, one
12612/// of the trees will be vectorized, and the other will remain (unprofitably)
12613/// scalarized.
12614static SDValue
12616 const RISCVSubtarget &Subtarget) {
12617
12618 // This transforms need to run before all integer types have been legalized
12619 // to i64 (so that the vector element type matches the add type), and while
12620 // it's safe to introduce odd sized vector types.
12622 return SDValue();
12623
12624 // Without V, this transform isn't useful. We could form the (illegal)
12625 // operations and let them be scalarized again, but there's really no point.
12626 if (!Subtarget.hasVInstructions())
12627 return SDValue();
12628
12629 const SDLoc DL(N);
12630 const EVT VT = N->getValueType(0);
12631 const unsigned Opc = N->getOpcode();
12632
12633 // For FADD, we only handle the case with reassociation allowed. We
12634 // could handle strict reduction order, but at the moment, there's no
12635 // known reason to, and the complexity isn't worth it.
12636 // TODO: Handle fminnum and fmaxnum here
12637 if (!VT.isInteger() &&
12638 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
12639 return SDValue();
12640
12641 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
12642 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
12643 "Inconsistent mappings");
12644 SDValue LHS = N->getOperand(0);
12645 SDValue RHS = N->getOperand(1);
12646
12647 if (!LHS.hasOneUse() || !RHS.hasOneUse())
12648 return SDValue();
12649
12650 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12651 std::swap(LHS, RHS);
12652
12653 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12654 !isa<ConstantSDNode>(RHS.getOperand(1)))
12655 return SDValue();
12656
12657 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
12658 SDValue SrcVec = RHS.getOperand(0);
12659 EVT SrcVecVT = SrcVec.getValueType();
12660 assert(SrcVecVT.getVectorElementType() == VT);
12661 if (SrcVecVT.isScalableVector())
12662 return SDValue();
12663
12664 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
12665 return SDValue();
12666
12667 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
12668 // reduce_op (extract_subvector [2 x VT] from V). This will form the
12669 // root of our reduction tree. TODO: We could extend this to any two
12670 // adjacent aligned constant indices if desired.
12671 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12672 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
12673 uint64_t LHSIdx =
12674 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
12675 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
12676 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
12677 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12678 DAG.getVectorIdxConstant(0, DL));
12679 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
12680 }
12681 }
12682
12683 // Match (binop (reduce (extract_subvector V, 0),
12684 // (extract_vector_elt V, sizeof(SubVec))))
12685 // into a reduction of one more element from the original vector V.
12686 if (LHS.getOpcode() != ReduceOpc)
12687 return SDValue();
12688
12689 SDValue ReduceVec = LHS.getOperand(0);
12690 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
12691 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
12692 isNullConstant(ReduceVec.getOperand(1)) &&
12693 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
12694 // For illegal types (e.g. 3xi32), most will be combined again into a
12695 // wider (hopefully legal) type. If this is a terminal state, we are
12696 // relying on type legalization here to produce something reasonable
12697 // and this lowering quality could probably be improved. (TODO)
12698 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
12699 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12700 DAG.getVectorIdxConstant(0, DL));
12701 auto Flags = ReduceVec->getFlags();
12702 Flags.intersectWith(N->getFlags());
12703 return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags);
12704 }
12705
12706 return SDValue();
12707}
12708
12709
12710// Try to fold (<bop> x, (reduction.<bop> vec, start))
12712 const RISCVSubtarget &Subtarget) {
12713 auto BinOpToRVVReduce = [](unsigned Opc) {
12714 switch (Opc) {
12715 default:
12716 llvm_unreachable("Unhandled binary to transfrom reduction");
12717 case ISD::ADD:
12719 case ISD::UMAX:
12721 case ISD::SMAX:
12723 case ISD::UMIN:
12725 case ISD::SMIN:
12727 case ISD::AND:
12729 case ISD::OR:
12731 case ISD::XOR:
12733 case ISD::FADD:
12735 case ISD::FMAXNUM:
12737 case ISD::FMINNUM:
12739 }
12740 };
12741
12742 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
12743 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12744 isNullConstant(V.getOperand(1)) &&
12745 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
12746 };
12747
12748 unsigned Opc = N->getOpcode();
12749 unsigned ReduceIdx;
12750 if (IsReduction(N->getOperand(0), Opc))
12751 ReduceIdx = 0;
12752 else if (IsReduction(N->getOperand(1), Opc))
12753 ReduceIdx = 1;
12754 else
12755 return SDValue();
12756
12757 // Skip if FADD disallows reassociation but the combiner needs.
12758 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
12759 return SDValue();
12760
12761 SDValue Extract = N->getOperand(ReduceIdx);
12762 SDValue Reduce = Extract.getOperand(0);
12763 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
12764 return SDValue();
12765
12766 SDValue ScalarV = Reduce.getOperand(2);
12767 EVT ScalarVT = ScalarV.getValueType();
12768 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
12769 ScalarV.getOperand(0)->isUndef() &&
12770 isNullConstant(ScalarV.getOperand(2)))
12771 ScalarV = ScalarV.getOperand(1);
12772
12773 // Make sure that ScalarV is a splat with VL=1.
12774 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
12775 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
12776 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
12777 return SDValue();
12778
12779 if (!isNonZeroAVL(ScalarV.getOperand(2)))
12780 return SDValue();
12781
12782 // Check the scalar of ScalarV is neutral element
12783 // TODO: Deal with value other than neutral element.
12784 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
12785 0))
12786 return SDValue();
12787
12788 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
12789 // FIXME: We might be able to improve this if operand 0 is undef.
12790 if (!isNonZeroAVL(Reduce.getOperand(5)))
12791 return SDValue();
12792
12793 SDValue NewStart = N->getOperand(1 - ReduceIdx);
12794
12795 SDLoc DL(N);
12796 SDValue NewScalarV =
12797 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
12798 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
12799
12800 // If we looked through an INSERT_SUBVECTOR we need to restore it.
12801 if (ScalarVT != ScalarV.getValueType())
12802 NewScalarV =
12803 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
12804 NewScalarV, DAG.getVectorIdxConstant(0, DL));
12805
12806 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
12807 NewScalarV, Reduce.getOperand(3),
12808 Reduce.getOperand(4), Reduce.getOperand(5)};
12809 SDValue NewReduce =
12810 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
12811 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
12812 Extract.getOperand(1));
12813}
12814
12815// Optimize (add (shl x, c0), (shl y, c1)) ->
12816// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
12818 const RISCVSubtarget &Subtarget) {
12819 // Perform this optimization only in the zba extension.
12820 if (!Subtarget.hasStdExtZba())
12821 return SDValue();
12822
12823 // Skip for vector types and larger types.
12824 EVT VT = N->getValueType(0);
12825 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12826 return SDValue();
12827
12828 // The two operand nodes must be SHL and have no other use.
12829 SDValue N0 = N->getOperand(0);
12830 SDValue N1 = N->getOperand(1);
12831 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
12832 !N0->hasOneUse() || !N1->hasOneUse())
12833 return SDValue();
12834
12835 // Check c0 and c1.
12836 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12837 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
12838 if (!N0C || !N1C)
12839 return SDValue();
12840 int64_t C0 = N0C->getSExtValue();
12841 int64_t C1 = N1C->getSExtValue();
12842 if (C0 <= 0 || C1 <= 0)
12843 return SDValue();
12844
12845 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
12846 int64_t Bits = std::min(C0, C1);
12847 int64_t Diff = std::abs(C0 - C1);
12848 if (Diff != 1 && Diff != 2 && Diff != 3)
12849 return SDValue();
12850
12851 // Build nodes.
12852 SDLoc DL(N);
12853 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
12854 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
12855 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
12856 DAG.getConstant(Diff, DL, VT), NS);
12857 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
12858}
12859
12860// Combine a constant select operand into its use:
12861//
12862// (and (select cond, -1, c), x)
12863// -> (select cond, x, (and x, c)) [AllOnes=1]
12864// (or (select cond, 0, c), x)
12865// -> (select cond, x, (or x, c)) [AllOnes=0]
12866// (xor (select cond, 0, c), x)
12867// -> (select cond, x, (xor x, c)) [AllOnes=0]
12868// (add (select cond, 0, c), x)
12869// -> (select cond, x, (add x, c)) [AllOnes=0]
12870// (sub x, (select cond, 0, c))
12871// -> (select cond, x, (sub x, c)) [AllOnes=0]
12873 SelectionDAG &DAG, bool AllOnes,
12874 const RISCVSubtarget &Subtarget) {
12875 EVT VT = N->getValueType(0);
12876
12877 // Skip vectors.
12878 if (VT.isVector())
12879 return SDValue();
12880
12881 if (!Subtarget.hasConditionalMoveFusion()) {
12882 // (select cond, x, (and x, c)) has custom lowering with Zicond.
12883 if ((!Subtarget.hasStdExtZicond() &&
12884 !Subtarget.hasVendorXVentanaCondOps()) ||
12885 N->getOpcode() != ISD::AND)
12886 return SDValue();
12887
12888 // Maybe harmful when condition code has multiple use.
12889 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
12890 return SDValue();
12891
12892 // Maybe harmful when VT is wider than XLen.
12893 if (VT.getSizeInBits() > Subtarget.getXLen())
12894 return SDValue();
12895 }
12896
12897 if ((Slct.getOpcode() != ISD::SELECT &&
12898 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
12899 !Slct.hasOneUse())
12900 return SDValue();
12901
12902 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
12904 };
12905
12906 bool SwapSelectOps;
12907 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
12908 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
12909 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
12910 SDValue NonConstantVal;
12911 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
12912 SwapSelectOps = false;
12913 NonConstantVal = FalseVal;
12914 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
12915 SwapSelectOps = true;
12916 NonConstantVal = TrueVal;
12917 } else
12918 return SDValue();
12919
12920 // Slct is now know to be the desired identity constant when CC is true.
12921 TrueVal = OtherOp;
12922 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
12923 // Unless SwapSelectOps says the condition should be false.
12924 if (SwapSelectOps)
12925 std::swap(TrueVal, FalseVal);
12926
12927 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
12928 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
12929 {Slct.getOperand(0), Slct.getOperand(1),
12930 Slct.getOperand(2), TrueVal, FalseVal});
12931
12932 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12933 {Slct.getOperand(0), TrueVal, FalseVal});
12934}
12935
12936// Attempt combineSelectAndUse on each operand of a commutative operator N.
12938 bool AllOnes,
12939 const RISCVSubtarget &Subtarget) {
12940 SDValue N0 = N->getOperand(0);
12941 SDValue N1 = N->getOperand(1);
12942 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
12943 return Result;
12944 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
12945 return Result;
12946 return SDValue();
12947}
12948
12949// Transform (add (mul x, c0), c1) ->
12950// (add (mul (add x, c1/c0), c0), c1%c0).
12951// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
12952// that should be excluded is when c0*(c1/c0) is simm12, which will lead
12953// to an infinite loop in DAGCombine if transformed.
12954// Or transform (add (mul x, c0), c1) ->
12955// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
12956// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
12957// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
12958// lead to an infinite loop in DAGCombine if transformed.
12959// Or transform (add (mul x, c0), c1) ->
12960// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
12961// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
12962// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
12963// lead to an infinite loop in DAGCombine if transformed.
12964// Or transform (add (mul x, c0), c1) ->
12965// (mul (add x, c1/c0), c0).
12966// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
12968 const RISCVSubtarget &Subtarget) {
12969 // Skip for vector types and larger types.
12970 EVT VT = N->getValueType(0);
12971 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12972 return SDValue();
12973 // The first operand node must be a MUL and has no other use.
12974 SDValue N0 = N->getOperand(0);
12975 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
12976 return SDValue();
12977 // Check if c0 and c1 match above conditions.
12978 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12979 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
12980 if (!N0C || !N1C)
12981 return SDValue();
12982 // If N0C has multiple uses it's possible one of the cases in
12983 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
12984 // in an infinite loop.
12985 if (!N0C->hasOneUse())
12986 return SDValue();
12987 int64_t C0 = N0C->getSExtValue();
12988 int64_t C1 = N1C->getSExtValue();
12989 int64_t CA, CB;
12990 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
12991 return SDValue();
12992 // Search for proper CA (non-zero) and CB that both are simm12.
12993 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
12994 !isInt<12>(C0 * (C1 / C0))) {
12995 CA = C1 / C0;
12996 CB = C1 % C0;
12997 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
12998 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
12999 CA = C1 / C0 + 1;
13000 CB = C1 % C0 - C0;
13001 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
13002 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
13003 CA = C1 / C0 - 1;
13004 CB = C1 % C0 + C0;
13005 } else
13006 return SDValue();
13007 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
13008 SDLoc DL(N);
13009 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
13010 DAG.getSignedConstant(CA, DL, VT));
13011 SDValue New1 =
13012 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
13013 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
13014}
13015
13016// add (zext, zext) -> zext (add (zext, zext))
13017// sub (zext, zext) -> sext (sub (zext, zext))
13018// mul (zext, zext) -> zext (mul (zext, zext))
13019// sdiv (zext, zext) -> zext (sdiv (zext, zext))
13020// udiv (zext, zext) -> zext (udiv (zext, zext))
13021// srem (zext, zext) -> zext (srem (zext, zext))
13022// urem (zext, zext) -> zext (urem (zext, zext))
13023//
13024// where the sum of the extend widths match, and the the range of the bin op
13025// fits inside the width of the narrower bin op. (For profitability on rvv, we
13026// use a power of two for both inner and outer extend.)
13028
13029 EVT VT = N->getValueType(0);
13030 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
13031 return SDValue();
13032
13033 SDValue N0 = N->getOperand(0);
13034 SDValue N1 = N->getOperand(1);
13036 return SDValue();
13037 if (!N0.hasOneUse() || !N1.hasOneUse())
13038 return SDValue();
13039
13040 SDValue Src0 = N0.getOperand(0);
13041 SDValue Src1 = N1.getOperand(0);
13042 EVT SrcVT = Src0.getValueType();
13043 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
13044 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
13045 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
13046 return SDValue();
13047
13048 LLVMContext &C = *DAG.getContext();
13050 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
13051
13052 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
13053 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
13054
13055 // Src0 and Src1 are zero extended, so they're always positive if signed.
13056 //
13057 // sub can produce a negative from two positive operands, so it needs sign
13058 // extended. Other nodes produce a positive from two positive operands, so
13059 // zero extend instead.
13060 unsigned OuterExtend =
13061 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13062
13063 return DAG.getNode(
13064 OuterExtend, SDLoc(N), VT,
13065 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
13066}
13067
13068// Try to turn (add (xor bool, 1) -1) into (neg bool).
13070 SDValue N0 = N->getOperand(0);
13071 SDValue N1 = N->getOperand(1);
13072 EVT VT = N->getValueType(0);
13073 SDLoc DL(N);
13074
13075 // RHS should be -1.
13076 if (!isAllOnesConstant(N1))
13077 return SDValue();
13078
13079 // Look for (xor X, 1).
13080 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
13081 return SDValue();
13082
13083 // First xor input should be 0 or 1.
13085 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
13086 return SDValue();
13087
13088 // Emit a negate of the setcc.
13089 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
13090 N0.getOperand(0));
13091}
13092
13095 const RISCVSubtarget &Subtarget) {
13096 SelectionDAG &DAG = DCI.DAG;
13097 if (SDValue V = combineAddOfBooleanXor(N, DAG))
13098 return V;
13099 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
13100 return V;
13101 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
13102 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
13103 return V;
13104 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13105 return V;
13106 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13107 return V;
13108 if (SDValue V = combineBinOpOfZExt(N, DAG))
13109 return V;
13110
13111 // fold (add (select lhs, rhs, cc, 0, y), x) ->
13112 // (select lhs, rhs, cc, x, (add x, y))
13113 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13114}
13115
13116// Try to turn a sub boolean RHS and constant LHS into an addi.
13118 SDValue N0 = N->getOperand(0);
13119 SDValue N1 = N->getOperand(1);
13120 EVT VT = N->getValueType(0);
13121 SDLoc DL(N);
13122
13123 // Require a constant LHS.
13124 auto *N0C = dyn_cast<ConstantSDNode>(N0);
13125 if (!N0C)
13126 return SDValue();
13127
13128 // All our optimizations involve subtracting 1 from the immediate and forming
13129 // an ADDI. Make sure the new immediate is valid for an ADDI.
13130 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
13131 if (!ImmValMinus1.isSignedIntN(12))
13132 return SDValue();
13133
13134 SDValue NewLHS;
13135 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
13136 // (sub constant, (setcc x, y, eq/neq)) ->
13137 // (add (setcc x, y, neq/eq), constant - 1)
13138 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13139 EVT SetCCOpVT = N1.getOperand(0).getValueType();
13140 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
13141 return SDValue();
13142 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
13143 NewLHS =
13144 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
13145 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
13146 N1.getOperand(0).getOpcode() == ISD::SETCC) {
13147 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
13148 // Since setcc returns a bool the xor is equivalent to 1-setcc.
13149 NewLHS = N1.getOperand(0);
13150 } else
13151 return SDValue();
13152
13153 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
13154 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
13155}
13156
13157// Looks for (sub (shl X, 8), X) where only bits 8, 16, 24, 32, etc. of X are
13158// non-zero. Replace with orc.b.
13160 const RISCVSubtarget &Subtarget) {
13161 if (!Subtarget.hasStdExtZbb())
13162 return SDValue();
13163
13164 EVT VT = N->getValueType(0);
13165
13166 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
13167 return SDValue();
13168
13169 SDValue N0 = N->getOperand(0);
13170 SDValue N1 = N->getOperand(1);
13171
13172 if (N0.getOpcode() != ISD::SHL || N0.getOperand(0) != N1 || !N0.hasOneUse())
13173 return SDValue();
13174
13175 auto *ShAmtC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
13176 if (!ShAmtC || ShAmtC->getZExtValue() != 8)
13177 return SDValue();
13178
13179 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0xfe));
13180 if (!DAG.MaskedValueIsZero(N1, Mask))
13181 return SDValue();
13182
13183 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, N1);
13184}
13185
13187 const RISCVSubtarget &Subtarget) {
13188 if (SDValue V = combineSubOfBoolean(N, DAG))
13189 return V;
13190
13191 EVT VT = N->getValueType(0);
13192 SDValue N0 = N->getOperand(0);
13193 SDValue N1 = N->getOperand(1);
13194 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
13195 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
13196 isNullConstant(N1.getOperand(1))) {
13197 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13198 if (CCVal == ISD::SETLT) {
13199 SDLoc DL(N);
13200 unsigned ShAmt = N0.getValueSizeInBits() - 1;
13201 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
13202 DAG.getConstant(ShAmt, DL, VT));
13203 }
13204 }
13205
13206 if (SDValue V = combineBinOpOfZExt(N, DAG))
13207 return V;
13208 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
13209 return V;
13210
13211 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
13212 // (select lhs, rhs, cc, x, (sub x, y))
13213 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
13214}
13215
13216// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
13217// Legalizing setcc can introduce xors like this. Doing this transform reduces
13218// the number of xors and may allow the xor to fold into a branch condition.
13220 SDValue N0 = N->getOperand(0);
13221 SDValue N1 = N->getOperand(1);
13222 bool IsAnd = N->getOpcode() == ISD::AND;
13223
13224 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
13225 return SDValue();
13226
13227 if (!N0.hasOneUse() || !N1.hasOneUse())
13228 return SDValue();
13229
13230 SDValue N01 = N0.getOperand(1);
13231 SDValue N11 = N1.getOperand(1);
13232
13233 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
13234 // (xor X, -1) based on the upper bits of the other operand being 0. If the
13235 // operation is And, allow one of the Xors to use -1.
13236 if (isOneConstant(N01)) {
13237 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
13238 return SDValue();
13239 } else if (isOneConstant(N11)) {
13240 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
13241 if (!(IsAnd && isAllOnesConstant(N01)))
13242 return SDValue();
13243 } else
13244 return SDValue();
13245
13246 EVT VT = N->getValueType(0);
13247
13248 SDValue N00 = N0.getOperand(0);
13249 SDValue N10 = N1.getOperand(0);
13250
13251 // The LHS of the xors needs to be 0/1.
13253 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
13254 return SDValue();
13255
13256 // Invert the opcode and insert a new xor.
13257 SDLoc DL(N);
13258 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
13259 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
13260 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
13261}
13262
13263// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
13264// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
13265// value to an unsigned value. This will be lowered to vmax and series of
13266// vnclipu instructions later. This can be extended to other truncated types
13267// other than i8 by replacing 256 and 255 with the equivalent constants for the
13268// type.
13270 EVT VT = N->getValueType(0);
13271 SDValue N0 = N->getOperand(0);
13272 EVT SrcVT = N0.getValueType();
13273
13274 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13275 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
13276 return SDValue();
13277
13278 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
13279 return SDValue();
13280
13281 SDValue Cond = N0.getOperand(0);
13282 SDValue True = N0.getOperand(1);
13283 SDValue False = N0.getOperand(2);
13284
13285 if (Cond.getOpcode() != ISD::SETCC)
13286 return SDValue();
13287
13288 // FIXME: Support the version of this pattern with the select operands
13289 // swapped.
13290 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
13291 if (CCVal != ISD::SETULT)
13292 return SDValue();
13293
13294 SDValue CondLHS = Cond.getOperand(0);
13295 SDValue CondRHS = Cond.getOperand(1);
13296
13297 if (CondLHS != True)
13298 return SDValue();
13299
13300 unsigned ScalarBits = VT.getScalarSizeInBits();
13301
13302 // FIXME: Support other constants.
13303 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
13304 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
13305 return SDValue();
13306
13307 if (False.getOpcode() != ISD::SIGN_EXTEND)
13308 return SDValue();
13309
13310 False = False.getOperand(0);
13311
13312 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
13313 return SDValue();
13314
13315 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
13316 if (!FalseRHSC || !FalseRHSC->isZero())
13317 return SDValue();
13318
13319 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
13320 if (CCVal2 != ISD::SETGT)
13321 return SDValue();
13322
13323 // Emit the signed to unsigned saturation pattern.
13324 SDLoc DL(N);
13325 SDValue Max =
13326 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
13327 SDValue Min =
13328 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
13329 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
13330 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
13331}
13332
13334 const RISCVSubtarget &Subtarget) {
13335 SDValue N0 = N->getOperand(0);
13336 EVT VT = N->getValueType(0);
13337
13338 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
13339 // extending X. This is safe since we only need the LSB after the shift and
13340 // shift amounts larger than 31 would produce poison. If we wait until
13341 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13342 // to use a BEXT instruction.
13343 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
13344 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
13345 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13346 SDLoc DL(N0);
13347 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13348 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13349 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13350 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
13351 }
13352
13353 return combineTruncSelectToSMaxUSat(N, DAG);
13354}
13355
13356// Combines two comparison operation and logic operation to one selection
13357// operation(min, max) and logic operation. Returns new constructed Node if
13358// conditions for optimization are satisfied.
13361 const RISCVSubtarget &Subtarget) {
13362 SelectionDAG &DAG = DCI.DAG;
13363
13364 SDValue N0 = N->getOperand(0);
13365 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
13366 // extending X. This is safe since we only need the LSB after the shift and
13367 // shift amounts larger than 31 would produce poison. If we wait until
13368 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13369 // to use a BEXT instruction.
13370 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13371 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
13372 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
13373 N0.hasOneUse()) {
13374 SDLoc DL(N);
13375 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13376 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13377 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13378 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
13379 DAG.getConstant(1, DL, MVT::i64));
13380 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13381 }
13382
13383 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13384 return V;
13385 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13386 return V;
13387
13388 if (DCI.isAfterLegalizeDAG())
13389 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13390 return V;
13391
13392 // fold (and (select lhs, rhs, cc, -1, y), x) ->
13393 // (select lhs, rhs, cc, x, (and x, y))
13394 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
13395}
13396
13397// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
13398// FIXME: Generalize to other binary operators with same operand.
13400 SelectionDAG &DAG) {
13401 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
13402
13403 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
13405 !N0.hasOneUse() || !N1.hasOneUse())
13406 return SDValue();
13407
13408 // Should have the same condition.
13409 SDValue Cond = N0.getOperand(1);
13410 if (Cond != N1.getOperand(1))
13411 return SDValue();
13412
13413 SDValue TrueV = N0.getOperand(0);
13414 SDValue FalseV = N1.getOperand(0);
13415
13416 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
13417 TrueV.getOperand(1) != FalseV.getOperand(1) ||
13418 !isOneConstant(TrueV.getOperand(1)) ||
13419 !TrueV.hasOneUse() || !FalseV.hasOneUse())
13420 return SDValue();
13421
13422 EVT VT = N->getValueType(0);
13423 SDLoc DL(N);
13424
13425 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
13426 Cond);
13427 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
13428 Cond);
13429 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
13430 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
13431}
13432
13434 const RISCVSubtarget &Subtarget) {
13435 SelectionDAG &DAG = DCI.DAG;
13436
13437 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13438 return V;
13439 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13440 return V;
13441
13442 if (DCI.isAfterLegalizeDAG())
13443 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13444 return V;
13445
13446 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
13447 // We may be able to pull a common operation out of the true and false value.
13448 SDValue N0 = N->getOperand(0);
13449 SDValue N1 = N->getOperand(1);
13450 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
13451 return V;
13452 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
13453 return V;
13454
13455 // fold (or (select cond, 0, y), x) ->
13456 // (select cond, x, (or x, y))
13457 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13458}
13459
13461 const RISCVSubtarget &Subtarget) {
13462 SDValue N0 = N->getOperand(0);
13463 SDValue N1 = N->getOperand(1);
13464
13465 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
13466 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
13467 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
13468 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13469 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
13470 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
13471 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13472 SDLoc DL(N);
13473 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13474 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13475 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
13476 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
13477 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13478 }
13479
13480 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
13481 // NOTE: Assumes ROL being legal means ROLW is legal.
13482 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13483 if (N0.getOpcode() == RISCVISD::SLLW &&
13485 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
13486 SDLoc DL(N);
13487 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
13488 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
13489 }
13490
13491 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
13492 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
13493 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
13494 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13495 if (ConstN00 && CC == ISD::SETLT) {
13496 EVT VT = N0.getValueType();
13497 SDLoc DL(N0);
13498 const APInt &Imm = ConstN00->getAPIntValue();
13499 if ((Imm + 1).isSignedIntN(12))
13500 return DAG.getSetCC(DL, VT, N0.getOperand(1),
13501 DAG.getConstant(Imm + 1, DL, VT), CC);
13502 }
13503 }
13504
13505 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13506 return V;
13507 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13508 return V;
13509
13510 // fold (xor (select cond, 0, y), x) ->
13511 // (select cond, x, (xor x, y))
13512 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13513}
13514
13515// Try to expand a scalar multiply to a faster sequence.
13518 const RISCVSubtarget &Subtarget) {
13519
13520 EVT VT = N->getValueType(0);
13521
13522 // LI + MUL is usually smaller than the alternative sequence.
13524 return SDValue();
13525
13526 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
13527 return SDValue();
13528
13529 if (VT != Subtarget.getXLenVT())
13530 return SDValue();
13531
13532 const bool HasShlAdd =
13533 Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
13534
13535 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
13536 if (!CNode)
13537 return SDValue();
13538 uint64_t MulAmt = CNode->getZExtValue();
13539
13540 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
13541 // We're adding additional uses of X here, and in principle, we should be freezing
13542 // X before doing so. However, adding freeze here causes real regressions, and no
13543 // other target properly freezes X in these cases either.
13544 SDValue X = N->getOperand(0);
13545
13546 if (HasShlAdd) {
13547 for (uint64_t Divisor : {3, 5, 9}) {
13548 if (MulAmt % Divisor != 0)
13549 continue;
13550 uint64_t MulAmt2 = MulAmt / Divisor;
13551 // 3/5/9 * 2^N -> shl (shXadd X, X), N
13552 if (isPowerOf2_64(MulAmt2)) {
13553 SDLoc DL(N);
13554 SDValue X = N->getOperand(0);
13555 // Put the shift first if we can fold a zext into the
13556 // shift forming a slli.uw.
13557 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
13558 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
13559 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
13560 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
13561 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
13562 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
13563 Shl);
13564 }
13565 // Otherwise, put rhe shl second so that it can fold with following
13566 // instructions (e.g. sext or add).
13567 SDValue Mul359 =
13568 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13569 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13570 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
13571 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
13572 }
13573
13574 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
13575 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
13576 SDLoc DL(N);
13577 SDValue Mul359 =
13578 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13579 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13580 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13581 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
13582 Mul359);
13583 }
13584 }
13585
13586 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
13587 // shXadd. First check if this a sum of two power of 2s because that's
13588 // easy. Then count how many zeros are up to the first bit.
13589 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
13590 unsigned ScaleShift = llvm::countr_zero(MulAmt);
13591 if (ScaleShift >= 1 && ScaleShift < 4) {
13592 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
13593 SDLoc DL(N);
13594 SDValue Shift1 =
13595 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13596 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13597 DAG.getConstant(ScaleShift, DL, VT), Shift1);
13598 }
13599 }
13600
13601 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
13602 // This is the two instruction form, there are also three instruction
13603 // variants we could implement. e.g.
13604 // (2^(1,2,3) * 3,5,9 + 1) << C2
13605 // 2^(C1>3) * 3,5,9 +/- 1
13606 for (uint64_t Divisor : {3, 5, 9}) {
13607 uint64_t C = MulAmt - 1;
13608 if (C <= Divisor)
13609 continue;
13610 unsigned TZ = llvm::countr_zero(C);
13611 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
13612 SDLoc DL(N);
13613 SDValue Mul359 =
13614 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13615 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13616 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13617 DAG.getConstant(TZ, DL, VT), X);
13618 }
13619 }
13620
13621 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
13622 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
13623 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
13624 if (ScaleShift >= 1 && ScaleShift < 4) {
13625 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
13626 SDLoc DL(N);
13627 SDValue Shift1 =
13628 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13629 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
13630 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13631 DAG.getConstant(ScaleShift, DL, VT), X));
13632 }
13633 }
13634
13635 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
13636 for (uint64_t Offset : {3, 5, 9}) {
13637 if (isPowerOf2_64(MulAmt + Offset)) {
13638 SDLoc DL(N);
13639 SDValue Shift1 =
13640 DAG.getNode(ISD::SHL, DL, VT, X,
13641 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
13642 SDValue Mul359 =
13643 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13644 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
13645 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
13646 }
13647 }
13648 }
13649
13650 // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
13651 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
13652 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
13653 uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
13654 SDLoc DL(N);
13655 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
13656 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
13657 SDValue Shift2 =
13658 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
13659 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
13660 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
13661 }
13662
13663 return SDValue();
13664}
13665
13666// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
13667// (bitcast (sra (v2Xi16 (bitcast X)), 15))
13668// Same for other equivalent types with other equivalent constants.
13670 EVT VT = N->getValueType(0);
13671 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13672
13673 // Do this for legal vectors unless they are i1 or i8 vectors.
13674 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
13675 return SDValue();
13676
13677 if (N->getOperand(0).getOpcode() != ISD::AND ||
13678 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
13679 return SDValue();
13680
13681 SDValue And = N->getOperand(0);
13682 SDValue Srl = And.getOperand(0);
13683
13684 APInt V1, V2, V3;
13685 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
13686 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
13688 return SDValue();
13689
13690 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
13691 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
13692 V3 != (HalfSize - 1))
13693 return SDValue();
13694
13695 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
13696 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
13697 VT.getVectorElementCount() * 2);
13698 SDLoc DL(N);
13699 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
13700 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
13701 DAG.getConstant(HalfSize - 1, DL, HalfVT));
13702 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
13703}
13704
13707 const RISCVSubtarget &Subtarget) {
13708 EVT VT = N->getValueType(0);
13709 if (!VT.isVector())
13710 return expandMul(N, DAG, DCI, Subtarget);
13711
13712 SDLoc DL(N);
13713 SDValue N0 = N->getOperand(0);
13714 SDValue N1 = N->getOperand(1);
13715 SDValue MulOper;
13716 unsigned AddSubOpc;
13717
13718 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
13719 // (mul x, add (y, 1)) -> (add x, (mul x, y))
13720 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
13721 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
13722 auto IsAddSubWith1 = [&](SDValue V) -> bool {
13723 AddSubOpc = V->getOpcode();
13724 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
13725 SDValue Opnd = V->getOperand(1);
13726 MulOper = V->getOperand(0);
13727 if (AddSubOpc == ISD::SUB)
13728 std::swap(Opnd, MulOper);
13729 if (isOneOrOneSplat(Opnd))
13730 return true;
13731 }
13732 return false;
13733 };
13734
13735 if (IsAddSubWith1(N0)) {
13736 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
13737 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
13738 }
13739
13740 if (IsAddSubWith1(N1)) {
13741 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
13742 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
13743 }
13744
13745 if (SDValue V = combineBinOpOfZExt(N, DAG))
13746 return V;
13747
13749 return V;
13750
13751 return SDValue();
13752}
13753
13754/// According to the property that indexed load/store instructions zero-extend
13755/// their indices, try to narrow the type of index operand.
13756static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
13757 if (isIndexTypeSigned(IndexType))
13758 return false;
13759
13760 if (!N->hasOneUse())
13761 return false;
13762
13763 EVT VT = N.getValueType();
13764 SDLoc DL(N);
13765
13766 // In general, what we're doing here is seeing if we can sink a truncate to
13767 // a smaller element type into the expression tree building our index.
13768 // TODO: We can generalize this and handle a bunch more cases if useful.
13769
13770 // Narrow a buildvector to the narrowest element type. This requires less
13771 // work and less register pressure at high LMUL, and creates smaller constants
13772 // which may be cheaper to materialize.
13773 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
13774 KnownBits Known = DAG.computeKnownBits(N);
13775 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
13776 LLVMContext &C = *DAG.getContext();
13777 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
13778 if (ResultVT.bitsLT(VT.getVectorElementType())) {
13779 N = DAG.getNode(ISD::TRUNCATE, DL,
13780 VT.changeVectorElementType(ResultVT), N);
13781 return true;
13782 }
13783 }
13784
13785 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
13786 if (N.getOpcode() != ISD::SHL)
13787 return false;
13788
13789 SDValue N0 = N.getOperand(0);
13790 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
13792 return false;
13793 if (!N0->hasOneUse())
13794 return false;
13795
13796 APInt ShAmt;
13797 SDValue N1 = N.getOperand(1);
13798 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
13799 return false;
13800
13801 SDValue Src = N0.getOperand(0);
13802 EVT SrcVT = Src.getValueType();
13803 unsigned SrcElen = SrcVT.getScalarSizeInBits();
13804 unsigned ShAmtV = ShAmt.getZExtValue();
13805 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
13806 NewElen = std::max(NewElen, 8U);
13807
13808 // Skip if NewElen is not narrower than the original extended type.
13809 if (NewElen >= N0.getValueType().getScalarSizeInBits())
13810 return false;
13811
13812 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
13813 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
13814
13815 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
13816 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
13817 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
13818 return true;
13819}
13820
13821// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
13822// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
13823// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
13824// can become a sext.w instead of a shift pair.
13826 const RISCVSubtarget &Subtarget) {
13827 SDValue N0 = N->getOperand(0);
13828 SDValue N1 = N->getOperand(1);
13829 EVT VT = N->getValueType(0);
13830 EVT OpVT = N0.getValueType();
13831
13832 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
13833 return SDValue();
13834
13835 // RHS needs to be a constant.
13836 auto *N1C = dyn_cast<ConstantSDNode>(N1);
13837 if (!N1C)
13838 return SDValue();
13839
13840 // LHS needs to be (and X, 0xffffffff).
13841 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
13842 !isa<ConstantSDNode>(N0.getOperand(1)) ||
13843 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
13844 return SDValue();
13845
13846 // Looking for an equality compare.
13847 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
13848 if (!isIntEqualitySetCC(Cond))
13849 return SDValue();
13850
13851 // Don't do this if the sign bit is provably zero, it will be turned back into
13852 // an AND.
13853 APInt SignMask = APInt::getOneBitSet(64, 31);
13854 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
13855 return SDValue();
13856
13857 const APInt &C1 = N1C->getAPIntValue();
13858
13859 SDLoc dl(N);
13860 // If the constant is larger than 2^32 - 1 it is impossible for both sides
13861 // to be equal.
13862 if (C1.getActiveBits() > 32)
13863 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
13864
13865 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
13866 N0.getOperand(0), DAG.getValueType(MVT::i32));
13867 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
13868 dl, OpVT), Cond);
13869}
13870
13871static SDValue
13873 const RISCVSubtarget &Subtarget) {
13874 SDValue Src = N->getOperand(0);
13875 EVT VT = N->getValueType(0);
13876
13877 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
13878 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
13879 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
13880 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16) &&
13881 Subtarget.hasStdExtZfhmin())
13882 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
13883 Src.getOperand(0));
13884
13885 return SDValue();
13886}
13887
13888namespace {
13889// Forward declaration of the structure holding the necessary information to
13890// apply a combine.
13891struct CombineResult;
13892
13893enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
13894/// Helper class for folding sign/zero extensions.
13895/// In particular, this class is used for the following combines:
13896/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
13897/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
13898/// mul | mul_vl -> vwmul(u) | vwmul_su
13899/// shl | shl_vl -> vwsll
13900/// fadd -> vfwadd | vfwadd_w
13901/// fsub -> vfwsub | vfwsub_w
13902/// fmul -> vfwmul
13903/// An object of this class represents an operand of the operation we want to
13904/// combine.
13905/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
13906/// NodeExtensionHelper for `a` and one for `b`.
13907///
13908/// This class abstracts away how the extension is materialized and
13909/// how its number of users affect the combines.
13910///
13911/// In particular:
13912/// - VWADD_W is conceptually == add(op0, sext(op1))
13913/// - VWADDU_W == add(op0, zext(op1))
13914/// - VWSUB_W == sub(op0, sext(op1))
13915/// - VWSUBU_W == sub(op0, zext(op1))
13916/// - VFWADD_W == fadd(op0, fpext(op1))
13917/// - VFWSUB_W == fsub(op0, fpext(op1))
13918/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
13919/// zext|sext(smaller_value).
13920struct NodeExtensionHelper {
13921 /// Records if this operand is like being zero extended.
13922 bool SupportsZExt;
13923 /// Records if this operand is like being sign extended.
13924 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
13925 /// instance, a splat constant (e.g., 3), would support being both sign and
13926 /// zero extended.
13927 bool SupportsSExt;
13928 /// Records if this operand is like being floating-Point extended.
13929 bool SupportsFPExt;
13930 /// This boolean captures whether we care if this operand would still be
13931 /// around after the folding happens.
13932 bool EnforceOneUse;
13933 /// Original value that this NodeExtensionHelper represents.
13934 SDValue OrigOperand;
13935
13936 /// Get the value feeding the extension or the value itself.
13937 /// E.g., for zext(a), this would return a.
13938 SDValue getSource() const {
13939 switch (OrigOperand.getOpcode()) {
13940 case ISD::ZERO_EXTEND:
13941 case ISD::SIGN_EXTEND:
13942 case RISCVISD::VSEXT_VL:
13943 case RISCVISD::VZEXT_VL:
13945 return OrigOperand.getOperand(0);
13946 default:
13947 return OrigOperand;
13948 }
13949 }
13950
13951 /// Check if this instance represents a splat.
13952 bool isSplat() const {
13953 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
13954 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
13955 }
13956
13957 /// Get the extended opcode.
13958 unsigned getExtOpc(ExtKind SupportsExt) const {
13959 switch (SupportsExt) {
13960 case ExtKind::SExt:
13961 return RISCVISD::VSEXT_VL;
13962 case ExtKind::ZExt:
13963 return RISCVISD::VZEXT_VL;
13964 case ExtKind::FPExt:
13966 }
13967 llvm_unreachable("Unknown ExtKind enum");
13968 }
13969
13970 /// Get or create a value that can feed \p Root with the given extension \p
13971 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
13972 /// operand. \see ::getSource().
13973 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
13974 const RISCVSubtarget &Subtarget,
13975 std::optional<ExtKind> SupportsExt) const {
13976 if (!SupportsExt.has_value())
13977 return OrigOperand;
13978
13979 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
13980
13981 SDValue Source = getSource();
13982 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
13983 if (Source.getValueType() == NarrowVT)
13984 return Source;
13985
13986 unsigned ExtOpc = getExtOpc(*SupportsExt);
13987
13988 // If we need an extension, we should be changing the type.
13989 SDLoc DL(OrigOperand);
13990 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
13991 switch (OrigOperand.getOpcode()) {
13992 case ISD::ZERO_EXTEND:
13993 case ISD::SIGN_EXTEND:
13994 case RISCVISD::VSEXT_VL:
13995 case RISCVISD::VZEXT_VL:
13997 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
13998 case ISD::SPLAT_VECTOR:
13999 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
14001 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
14002 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
14004 Source = Source.getOperand(1);
14005 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
14006 Source = Source.getOperand(0);
14007 assert(Source.getValueType() == NarrowVT.getVectorElementType());
14008 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
14009 DAG.getUNDEF(NarrowVT), Source, VL);
14010 default:
14011 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
14012 // and that operand should already have the right NarrowVT so no
14013 // extension should be required at this point.
14014 llvm_unreachable("Unsupported opcode");
14015 }
14016 }
14017
14018 /// Helper function to get the narrow type for \p Root.
14019 /// The narrow type is the type of \p Root where we divided the size of each
14020 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
14021 /// \pre Both the narrow type and the original type should be legal.
14022 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
14023 MVT VT = Root->getSimpleValueType(0);
14024
14025 // Determine the narrow size.
14026 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
14027
14028 MVT EltVT = SupportsExt == ExtKind::FPExt
14029 ? MVT::getFloatingPointVT(NarrowSize)
14030 : MVT::getIntegerVT(NarrowSize);
14031
14032 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
14033 "Trying to extend something we can't represent");
14034 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
14035 return NarrowVT;
14036 }
14037
14038 /// Get the opcode to materialize:
14039 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
14040 static unsigned getSExtOpcode(unsigned Opcode) {
14041 switch (Opcode) {
14042 case ISD::ADD:
14043 case RISCVISD::ADD_VL:
14046 case ISD::OR:
14047 return RISCVISD::VWADD_VL;
14048 case ISD::SUB:
14049 case RISCVISD::SUB_VL:
14052 return RISCVISD::VWSUB_VL;
14053 case ISD::MUL:
14054 case RISCVISD::MUL_VL:
14055 return RISCVISD::VWMUL_VL;
14056 default:
14057 llvm_unreachable("Unexpected opcode");
14058 }
14059 }
14060
14061 /// Get the opcode to materialize:
14062 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
14063 static unsigned getZExtOpcode(unsigned Opcode) {
14064 switch (Opcode) {
14065 case ISD::ADD:
14066 case RISCVISD::ADD_VL:
14069 case ISD::OR:
14070 return RISCVISD::VWADDU_VL;
14071 case ISD::SUB:
14072 case RISCVISD::SUB_VL:
14075 return RISCVISD::VWSUBU_VL;
14076 case ISD::MUL:
14077 case RISCVISD::MUL_VL:
14078 return RISCVISD::VWMULU_VL;
14079 case ISD::SHL:
14080 case RISCVISD::SHL_VL:
14081 return RISCVISD::VWSLL_VL;
14082 default:
14083 llvm_unreachable("Unexpected opcode");
14084 }
14085 }
14086
14087 /// Get the opcode to materialize:
14088 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
14089 static unsigned getFPExtOpcode(unsigned Opcode) {
14090 switch (Opcode) {
14091 case RISCVISD::FADD_VL:
14093 return RISCVISD::VFWADD_VL;
14094 case RISCVISD::FSUB_VL:
14096 return RISCVISD::VFWSUB_VL;
14097 case RISCVISD::FMUL_VL:
14098 return RISCVISD::VFWMUL_VL;
14100 return RISCVISD::VFWMADD_VL;
14102 return RISCVISD::VFWMSUB_VL;
14104 return RISCVISD::VFWNMADD_VL;
14106 return RISCVISD::VFWNMSUB_VL;
14107 default:
14108 llvm_unreachable("Unexpected opcode");
14109 }
14110 }
14111
14112 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
14113 /// newOpcode(a, b).
14114 static unsigned getSUOpcode(unsigned Opcode) {
14115 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
14116 "SU is only supported for MUL");
14117 return RISCVISD::VWMULSU_VL;
14118 }
14119
14120 /// Get the opcode to materialize
14121 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
14122 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
14123 switch (Opcode) {
14124 case ISD::ADD:
14125 case RISCVISD::ADD_VL:
14126 case ISD::OR:
14127 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
14129 case ISD::SUB:
14130 case RISCVISD::SUB_VL:
14131 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
14133 case RISCVISD::FADD_VL:
14134 return RISCVISD::VFWADD_W_VL;
14135 case RISCVISD::FSUB_VL:
14136 return RISCVISD::VFWSUB_W_VL;
14137 default:
14138 llvm_unreachable("Unexpected opcode");
14139 }
14140 }
14141
14142 using CombineToTry = std::function<std::optional<CombineResult>(
14143 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
14144 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
14145 const RISCVSubtarget &)>;
14146
14147 /// Check if this node needs to be fully folded or extended for all users.
14148 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
14149
14150 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
14151 const RISCVSubtarget &Subtarget) {
14152 unsigned Opc = OrigOperand.getOpcode();
14153 MVT VT = OrigOperand.getSimpleValueType();
14154
14155 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
14156 "Unexpected Opcode");
14157
14158 // The pasthru must be undef for tail agnostic.
14159 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
14160 return;
14161
14162 // Get the scalar value.
14163 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
14164 : OrigOperand.getOperand(1);
14165
14166 // See if we have enough sign bits or zero bits in the scalar to use a
14167 // widening opcode by splatting to smaller element size.
14168 unsigned EltBits = VT.getScalarSizeInBits();
14169 unsigned ScalarBits = Op.getValueSizeInBits();
14170 // If we're not getting all bits from the element, we need special handling.
14171 if (ScalarBits < EltBits) {
14172 // This should only occur on RV32.
14173 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
14174 !Subtarget.is64Bit() && "Unexpected splat");
14175 // vmv.v.x sign extends narrow inputs.
14176 SupportsSExt = true;
14177
14178 // If the input is positive, then sign extend is also zero extend.
14179 if (DAG.SignBitIsZero(Op))
14180 SupportsZExt = true;
14181
14182 EnforceOneUse = false;
14183 return;
14184 }
14185
14186 unsigned NarrowSize = EltBits / 2;
14187 // If the narrow type cannot be expressed with a legal VMV,
14188 // this is not a valid candidate.
14189 if (NarrowSize < 8)
14190 return;
14191
14192 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
14193 SupportsSExt = true;
14194
14195 if (DAG.MaskedValueIsZero(Op,
14196 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
14197 SupportsZExt = true;
14198
14199 EnforceOneUse = false;
14200 }
14201
14202 /// Helper method to set the various fields of this struct based on the
14203 /// type of \p Root.
14204 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
14205 const RISCVSubtarget &Subtarget) {
14206 SupportsZExt = false;
14207 SupportsSExt = false;
14208 SupportsFPExt = false;
14209 EnforceOneUse = true;
14210 unsigned Opc = OrigOperand.getOpcode();
14211 // For the nodes we handle below, we end up using their inputs directly: see
14212 // getSource(). However since they either don't have a passthru or we check
14213 // that their passthru is undef, we can safely ignore their mask and VL.
14214 switch (Opc) {
14215 case ISD::ZERO_EXTEND:
14216 case ISD::SIGN_EXTEND: {
14217 MVT VT = OrigOperand.getSimpleValueType();
14218 if (!VT.isVector())
14219 break;
14220
14221 SDValue NarrowElt = OrigOperand.getOperand(0);
14222 MVT NarrowVT = NarrowElt.getSimpleValueType();
14223 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
14224 if (NarrowVT.getVectorElementType() == MVT::i1)
14225 break;
14226
14227 SupportsZExt = Opc == ISD::ZERO_EXTEND;
14228 SupportsSExt = Opc == ISD::SIGN_EXTEND;
14229 break;
14230 }
14231 case RISCVISD::VZEXT_VL:
14232 SupportsZExt = true;
14233 break;
14234 case RISCVISD::VSEXT_VL:
14235 SupportsSExt = true;
14236 break;
14238 SupportsFPExt = true;
14239 break;
14240 case ISD::SPLAT_VECTOR:
14242 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
14243 break;
14244 case RISCVISD::VFMV_V_F_VL: {
14245 MVT VT = OrigOperand.getSimpleValueType();
14246
14247 if (!OrigOperand.getOperand(0).isUndef())
14248 break;
14249
14250 SDValue Op = OrigOperand.getOperand(1);
14251 if (Op.getOpcode() != ISD::FP_EXTEND)
14252 break;
14253
14254 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
14255 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
14256 if (NarrowSize != ScalarBits)
14257 break;
14258
14259 SupportsFPExt = true;
14260 break;
14261 }
14262 default:
14263 break;
14264 }
14265 }
14266
14267 /// Check if \p Root supports any extension folding combines.
14268 static bool isSupportedRoot(const SDNode *Root,
14269 const RISCVSubtarget &Subtarget) {
14270 switch (Root->getOpcode()) {
14271 case ISD::ADD:
14272 case ISD::SUB:
14273 case ISD::MUL: {
14274 return Root->getValueType(0).isScalableVector();
14275 }
14276 case ISD::OR: {
14277 return Root->getValueType(0).isScalableVector() &&
14278 Root->getFlags().hasDisjoint();
14279 }
14280 // Vector Widening Integer Add/Sub/Mul Instructions
14281 case RISCVISD::ADD_VL:
14282 case RISCVISD::MUL_VL:
14285 case RISCVISD::SUB_VL:
14288 // Vector Widening Floating-Point Add/Sub/Mul Instructions
14289 case RISCVISD::FADD_VL:
14290 case RISCVISD::FSUB_VL:
14291 case RISCVISD::FMUL_VL:
14294 return true;
14295 case ISD::SHL:
14296 return Root->getValueType(0).isScalableVector() &&
14297 Subtarget.hasStdExtZvbb();
14298 case RISCVISD::SHL_VL:
14299 return Subtarget.hasStdExtZvbb();
14304 return true;
14305 default:
14306 return false;
14307 }
14308 }
14309
14310 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
14311 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
14312 const RISCVSubtarget &Subtarget) {
14313 assert(isSupportedRoot(Root, Subtarget) &&
14314 "Trying to build an helper with an "
14315 "unsupported root");
14316 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
14318 OrigOperand = Root->getOperand(OperandIdx);
14319
14320 unsigned Opc = Root->getOpcode();
14321 switch (Opc) {
14322 // We consider
14323 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
14324 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
14325 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
14332 if (OperandIdx == 1) {
14333 SupportsZExt =
14335 SupportsSExt =
14337 SupportsFPExt =
14339 // There's no existing extension here, so we don't have to worry about
14340 // making sure it gets removed.
14341 EnforceOneUse = false;
14342 break;
14343 }
14344 [[fallthrough]];
14345 default:
14346 fillUpExtensionSupport(Root, DAG, Subtarget);
14347 break;
14348 }
14349 }
14350
14351 /// Helper function to get the Mask and VL from \p Root.
14352 static std::pair<SDValue, SDValue>
14353 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
14354 const RISCVSubtarget &Subtarget) {
14355 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
14356 switch (Root->getOpcode()) {
14357 case ISD::ADD:
14358 case ISD::SUB:
14359 case ISD::MUL:
14360 case ISD::OR:
14361 case ISD::SHL: {
14362 SDLoc DL(Root);
14363 MVT VT = Root->getSimpleValueType(0);
14364 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
14365 }
14366 default:
14367 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
14368 }
14369 }
14370
14371 /// Helper function to check if \p N is commutative with respect to the
14372 /// foldings that are supported by this class.
14373 static bool isCommutative(const SDNode *N) {
14374 switch (N->getOpcode()) {
14375 case ISD::ADD:
14376 case ISD::MUL:
14377 case ISD::OR:
14378 case RISCVISD::ADD_VL:
14379 case RISCVISD::MUL_VL:
14382 case RISCVISD::FADD_VL:
14383 case RISCVISD::FMUL_VL:
14389 return true;
14390 case ISD::SUB:
14391 case RISCVISD::SUB_VL:
14394 case RISCVISD::FSUB_VL:
14396 case ISD::SHL:
14397 case RISCVISD::SHL_VL:
14398 return false;
14399 default:
14400 llvm_unreachable("Unexpected opcode");
14401 }
14402 }
14403
14404 /// Get a list of combine to try for folding extensions in \p Root.
14405 /// Note that each returned CombineToTry function doesn't actually modify
14406 /// anything. Instead they produce an optional CombineResult that if not None,
14407 /// need to be materialized for the combine to be applied.
14408 /// \see CombineResult::materialize.
14409 /// If the related CombineToTry function returns std::nullopt, that means the
14410 /// combine didn't match.
14411 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
14412};
14413
14414/// Helper structure that holds all the necessary information to materialize a
14415/// combine that does some extension folding.
14416struct CombineResult {
14417 /// Opcode to be generated when materializing the combine.
14418 unsigned TargetOpcode;
14419 // No value means no extension is needed.
14420 std::optional<ExtKind> LHSExt;
14421 std::optional<ExtKind> RHSExt;
14422 /// Root of the combine.
14423 SDNode *Root;
14424 /// LHS of the TargetOpcode.
14425 NodeExtensionHelper LHS;
14426 /// RHS of the TargetOpcode.
14427 NodeExtensionHelper RHS;
14428
14429 CombineResult(unsigned TargetOpcode, SDNode *Root,
14430 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
14431 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
14432 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
14433 LHS(LHS), RHS(RHS) {}
14434
14435 /// Return a value that uses TargetOpcode and that can be used to replace
14436 /// Root.
14437 /// The actual replacement is *not* done in that method.
14438 SDValue materialize(SelectionDAG &DAG,
14439 const RISCVSubtarget &Subtarget) const {
14440 SDValue Mask, VL, Passthru;
14441 std::tie(Mask, VL) =
14442 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
14443 switch (Root->getOpcode()) {
14444 default:
14445 Passthru = Root->getOperand(2);
14446 break;
14447 case ISD::ADD:
14448 case ISD::SUB:
14449 case ISD::MUL:
14450 case ISD::OR:
14451 case ISD::SHL:
14452 Passthru = DAG.getUNDEF(Root->getValueType(0));
14453 break;
14454 }
14455 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
14456 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
14457 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
14458 Passthru, Mask, VL);
14459 }
14460};
14461
14462/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14463/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14464/// are zext) and LHS and RHS can be folded into Root.
14465/// AllowExtMask define which form `ext` can take in this pattern.
14466///
14467/// \note If the pattern can match with both zext and sext, the returned
14468/// CombineResult will feature the zext result.
14469///
14470/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14471/// can be used to apply the pattern.
14472static std::optional<CombineResult>
14473canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
14474 const NodeExtensionHelper &RHS,
14475 uint8_t AllowExtMask, SelectionDAG &DAG,
14476 const RISCVSubtarget &Subtarget) {
14477 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
14478 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
14479 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
14480 /*RHSExt=*/{ExtKind::ZExt});
14481 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
14482 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
14483 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14484 /*RHSExt=*/{ExtKind::SExt});
14485 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
14486 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
14487 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
14488 /*RHSExt=*/{ExtKind::FPExt});
14489 return std::nullopt;
14490}
14491
14492/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14493/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14494/// are zext) and LHS and RHS can be folded into Root.
14495///
14496/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14497/// can be used to apply the pattern.
14498static std::optional<CombineResult>
14499canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
14500 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14501 const RISCVSubtarget &Subtarget) {
14502 return canFoldToVWWithSameExtensionImpl(
14503 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
14504 Subtarget);
14505}
14506
14507/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
14508///
14509/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14510/// can be used to apply the pattern.
14511static std::optional<CombineResult>
14512canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
14513 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14514 const RISCVSubtarget &Subtarget) {
14515 if (RHS.SupportsFPExt)
14516 return CombineResult(
14517 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
14518 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
14519
14520 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
14521 // sext/zext?
14522 // Control this behavior behind an option (AllowSplatInVW_W) for testing
14523 // purposes.
14524 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
14525 return CombineResult(
14526 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
14527 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
14528 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
14529 return CombineResult(
14530 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
14531 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
14532 return std::nullopt;
14533}
14534
14535/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
14536///
14537/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14538/// can be used to apply the pattern.
14539static std::optional<CombineResult>
14540canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14541 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14542 const RISCVSubtarget &Subtarget) {
14543 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
14544 Subtarget);
14545}
14546
14547/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
14548///
14549/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14550/// can be used to apply the pattern.
14551static std::optional<CombineResult>
14552canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14553 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14554 const RISCVSubtarget &Subtarget) {
14555 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
14556 Subtarget);
14557}
14558
14559/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
14560///
14561/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14562/// can be used to apply the pattern.
14563static std::optional<CombineResult>
14564canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14565 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14566 const RISCVSubtarget &Subtarget) {
14567 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
14568 Subtarget);
14569}
14570
14571/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
14572///
14573/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14574/// can be used to apply the pattern.
14575static std::optional<CombineResult>
14576canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
14577 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14578 const RISCVSubtarget &Subtarget) {
14579
14580 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
14581 return std::nullopt;
14582 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
14583 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14584 /*RHSExt=*/{ExtKind::ZExt});
14585}
14586
14588NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
14589 SmallVector<CombineToTry> Strategies;
14590 switch (Root->getOpcode()) {
14591 case ISD::ADD:
14592 case ISD::SUB:
14593 case ISD::OR:
14594 case RISCVISD::ADD_VL:
14595 case RISCVISD::SUB_VL:
14596 case RISCVISD::FADD_VL:
14597 case RISCVISD::FSUB_VL:
14598 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
14599 Strategies.push_back(canFoldToVWWithSameExtension);
14600 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
14601 Strategies.push_back(canFoldToVW_W);
14602 break;
14603 case RISCVISD::FMUL_VL:
14608 Strategies.push_back(canFoldToVWWithSameExtension);
14609 break;
14610 case ISD::MUL:
14611 case RISCVISD::MUL_VL:
14612 // mul -> vwmul(u)
14613 Strategies.push_back(canFoldToVWWithSameExtension);
14614 // mul -> vwmulsu
14615 Strategies.push_back(canFoldToVW_SU);
14616 break;
14617 case ISD::SHL:
14618 case RISCVISD::SHL_VL:
14619 // shl -> vwsll
14620 Strategies.push_back(canFoldToVWWithZEXT);
14621 break;
14624 // vwadd_w|vwsub_w -> vwadd|vwsub
14625 Strategies.push_back(canFoldToVWWithSEXT);
14626 break;
14629 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
14630 Strategies.push_back(canFoldToVWWithZEXT);
14631 break;
14634 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
14635 Strategies.push_back(canFoldToVWWithFPEXT);
14636 break;
14637 default:
14638 llvm_unreachable("Unexpected opcode");
14639 }
14640 return Strategies;
14641}
14642} // End anonymous namespace.
14643
14644/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
14645/// The supported combines are:
14646/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14647/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14648/// mul | mul_vl -> vwmul(u) | vwmul_su
14649/// shl | shl_vl -> vwsll
14650/// fadd_vl -> vfwadd | vfwadd_w
14651/// fsub_vl -> vfwsub | vfwsub_w
14652/// fmul_vl -> vfwmul
14653/// vwadd_w(u) -> vwadd(u)
14654/// vwsub_w(u) -> vwsub(u)
14655/// vfwadd_w -> vfwadd
14656/// vfwsub_w -> vfwsub
14659 const RISCVSubtarget &Subtarget) {
14660 SelectionDAG &DAG = DCI.DAG;
14661 if (DCI.isBeforeLegalize())
14662 return SDValue();
14663
14664 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
14665 return SDValue();
14666
14667 SmallVector<SDNode *> Worklist;
14668 SmallSet<SDNode *, 8> Inserted;
14669 Worklist.push_back(N);
14670 Inserted.insert(N);
14671 SmallVector<CombineResult> CombinesToApply;
14672
14673 while (!Worklist.empty()) {
14674 SDNode *Root = Worklist.pop_back_val();
14675
14676 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
14677 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
14678 auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
14679 &Inserted](const NodeExtensionHelper &Op) {
14680 if (Op.needToPromoteOtherUsers()) {
14681 for (SDNode::use_iterator UI = Op.OrigOperand->use_begin(),
14682 UE = Op.OrigOperand->use_end();
14683 UI != UE; ++UI) {
14684 SDNode *TheUse = *UI;
14685 if (!NodeExtensionHelper::isSupportedRoot(TheUse, Subtarget))
14686 return false;
14687 // We only support the first 2 operands of FMA.
14688 if (UI.getOperandNo() >= 2)
14689 return false;
14690 if (Inserted.insert(TheUse).second)
14691 Worklist.push_back(TheUse);
14692 }
14693 }
14694 return true;
14695 };
14696
14697 // Control the compile time by limiting the number of node we look at in
14698 // total.
14699 if (Inserted.size() > ExtensionMaxWebSize)
14700 return SDValue();
14701
14703 NodeExtensionHelper::getSupportedFoldings(Root);
14704
14705 assert(!FoldingStrategies.empty() && "Nothing to be folded");
14706 bool Matched = false;
14707 for (int Attempt = 0;
14708 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
14709 ++Attempt) {
14710
14711 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
14712 FoldingStrategies) {
14713 std::optional<CombineResult> Res =
14714 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
14715 if (Res) {
14716 Matched = true;
14717 CombinesToApply.push_back(*Res);
14718 // All the inputs that are extended need to be folded, otherwise
14719 // we would be leaving the old input (since it is may still be used),
14720 // and the new one.
14721 if (Res->LHSExt.has_value())
14722 if (!AppendUsersIfNeeded(LHS))
14723 return SDValue();
14724 if (Res->RHSExt.has_value())
14725 if (!AppendUsersIfNeeded(RHS))
14726 return SDValue();
14727 break;
14728 }
14729 }
14730 std::swap(LHS, RHS);
14731 }
14732 // Right now we do an all or nothing approach.
14733 if (!Matched)
14734 return SDValue();
14735 }
14736 // Store the value for the replacement of the input node separately.
14737 SDValue InputRootReplacement;
14738 // We do the RAUW after we materialize all the combines, because some replaced
14739 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
14740 // some of these nodes may appear in the NodeExtensionHelpers of some of the
14741 // yet-to-be-visited CombinesToApply roots.
14743 ValuesToReplace.reserve(CombinesToApply.size());
14744 for (CombineResult Res : CombinesToApply) {
14745 SDValue NewValue = Res.materialize(DAG, Subtarget);
14746 if (!InputRootReplacement) {
14747 assert(Res.Root == N &&
14748 "First element is expected to be the current node");
14749 InputRootReplacement = NewValue;
14750 } else {
14751 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
14752 }
14753 }
14754 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
14755 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
14756 DCI.AddToWorklist(OldNewValues.second.getNode());
14757 }
14758 return InputRootReplacement;
14759}
14760
14761// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
14762// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
14763// y will be the Passthru and cond will be the Mask.
14765 unsigned Opc = N->getOpcode();
14768
14769 SDValue Y = N->getOperand(0);
14770 SDValue MergeOp = N->getOperand(1);
14771 unsigned MergeOpc = MergeOp.getOpcode();
14772
14773 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
14774 return SDValue();
14775
14776 SDValue X = MergeOp->getOperand(1);
14777
14778 if (!MergeOp.hasOneUse())
14779 return SDValue();
14780
14781 // Passthru should be undef
14782 SDValue Passthru = N->getOperand(2);
14783 if (!Passthru.isUndef())
14784 return SDValue();
14785
14786 // Mask should be all ones
14787 SDValue Mask = N->getOperand(3);
14788 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
14789 return SDValue();
14790
14791 // False value of MergeOp should be all zeros
14792 SDValue Z = MergeOp->getOperand(2);
14793
14794 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
14795 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
14796 Z = Z.getOperand(1);
14797
14798 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
14799 return SDValue();
14800
14801 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
14802 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
14803 N->getFlags());
14804}
14805
14808 const RISCVSubtarget &Subtarget) {
14809 [[maybe_unused]] unsigned Opc = N->getOpcode();
14812
14813 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
14814 return V;
14815
14816 return combineVWADDSUBWSelect(N, DCI.DAG);
14817}
14818
14819// Helper function for performMemPairCombine.
14820// Try to combine the memory loads/stores LSNode1 and LSNode2
14821// into a single memory pair operation.
14823 LSBaseSDNode *LSNode2, SDValue BasePtr,
14824 uint64_t Imm) {
14826 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
14827
14828 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
14829 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
14830 return SDValue();
14831
14833 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14834
14835 // The new operation has twice the width.
14836 MVT XLenVT = Subtarget.getXLenVT();
14837 EVT MemVT = LSNode1->getMemoryVT();
14838 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
14839 MachineMemOperand *MMO = LSNode1->getMemOperand();
14841 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
14842
14843 if (LSNode1->getOpcode() == ISD::LOAD) {
14844 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
14845 unsigned Opcode;
14846 if (MemVT == MVT::i32)
14847 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
14848 else
14849 Opcode = RISCVISD::TH_LDD;
14850
14851 SDValue Res = DAG.getMemIntrinsicNode(
14852 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
14853 {LSNode1->getChain(), BasePtr,
14854 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14855 NewMemVT, NewMMO);
14856
14857 SDValue Node1 =
14858 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
14859 SDValue Node2 =
14860 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
14861
14862 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
14863 return Node1;
14864 } else {
14865 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
14866
14867 SDValue Res = DAG.getMemIntrinsicNode(
14868 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
14869 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
14870 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14871 NewMemVT, NewMMO);
14872
14873 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
14874 return Res;
14875 }
14876}
14877
14878// Try to combine two adjacent loads/stores to a single pair instruction from
14879// the XTHeadMemPair vendor extension.
14882 SelectionDAG &DAG = DCI.DAG;
14884 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14885
14886 // Target does not support load/store pair.
14887 if (!Subtarget.hasVendorXTHeadMemPair())
14888 return SDValue();
14889
14890 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
14891 EVT MemVT = LSNode1->getMemoryVT();
14892 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
14893
14894 // No volatile, indexed or atomic loads/stores.
14895 if (!LSNode1->isSimple() || LSNode1->isIndexed())
14896 return SDValue();
14897
14898 // Function to get a base + constant representation from a memory value.
14899 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
14900 if (Ptr->getOpcode() == ISD::ADD)
14901 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
14902 return {Ptr->getOperand(0), C1->getZExtValue()};
14903 return {Ptr, 0};
14904 };
14905
14906 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
14907
14908 SDValue Chain = N->getOperand(0);
14909 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
14910 UI != UE; ++UI) {
14911 SDUse &Use = UI.getUse();
14912 if (Use.getUser() != N && Use.getResNo() == 0 &&
14913 Use.getUser()->getOpcode() == N->getOpcode()) {
14914 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
14915
14916 // No volatile, indexed or atomic loads/stores.
14917 if (!LSNode2->isSimple() || LSNode2->isIndexed())
14918 continue;
14919
14920 // Check if LSNode1 and LSNode2 have the same type and extension.
14921 if (LSNode1->getOpcode() == ISD::LOAD)
14922 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
14923 cast<LoadSDNode>(LSNode1)->getExtensionType())
14924 continue;
14925
14926 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
14927 continue;
14928
14929 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
14930
14931 // Check if the base pointer is the same for both instruction.
14932 if (Base1 != Base2)
14933 continue;
14934
14935 // Check if the offsets match the XTHeadMemPair encoding contraints.
14936 bool Valid = false;
14937 if (MemVT == MVT::i32) {
14938 // Check for adjacent i32 values and a 2-bit index.
14939 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
14940 Valid = true;
14941 } else if (MemVT == MVT::i64) {
14942 // Check for adjacent i64 values and a 2-bit index.
14943 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
14944 Valid = true;
14945 }
14946
14947 if (!Valid)
14948 continue;
14949
14950 // Try to combine.
14951 if (SDValue Res =
14952 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
14953 return Res;
14954 }
14955 }
14956
14957 return SDValue();
14958}
14959
14960// Fold
14961// (fp_to_int (froundeven X)) -> fcvt X, rne
14962// (fp_to_int (ftrunc X)) -> fcvt X, rtz
14963// (fp_to_int (ffloor X)) -> fcvt X, rdn
14964// (fp_to_int (fceil X)) -> fcvt X, rup
14965// (fp_to_int (fround X)) -> fcvt X, rmm
14966// (fp_to_int (frint X)) -> fcvt X
14969 const RISCVSubtarget &Subtarget) {
14970 SelectionDAG &DAG = DCI.DAG;
14971 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14972 MVT XLenVT = Subtarget.getXLenVT();
14973
14974 SDValue Src = N->getOperand(0);
14975
14976 // Don't do this for strict-fp Src.
14977 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
14978 return SDValue();
14979
14980 // Ensure the FP type is legal.
14981 if (!TLI.isTypeLegal(Src.getValueType()))
14982 return SDValue();
14983
14984 // Don't do this for f16 with Zfhmin and not Zfh.
14985 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
14986 return SDValue();
14987
14988 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
14989 // If the result is invalid, we didn't find a foldable instruction.
14990 if (FRM == RISCVFPRndMode::Invalid)
14991 return SDValue();
14992
14993 SDLoc DL(N);
14994 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
14995 EVT VT = N->getValueType(0);
14996
14997 if (VT.isVector() && TLI.isTypeLegal(VT)) {
14998 MVT SrcVT = Src.getSimpleValueType();
14999 MVT SrcContainerVT = SrcVT;
15000 MVT ContainerVT = VT.getSimpleVT();
15001 SDValue XVal = Src.getOperand(0);
15002
15003 // For widening and narrowing conversions we just combine it into a
15004 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
15005 // end up getting lowered to their appropriate pseudo instructions based on
15006 // their operand types
15007 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
15008 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
15009 return SDValue();
15010
15011 // Make fixed-length vectors scalable first
15012 if (SrcVT.isFixedLengthVector()) {
15013 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
15014 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
15015 ContainerVT =
15016 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
15017 }
15018
15019 auto [Mask, VL] =
15020 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
15021
15022 SDValue FpToInt;
15023 if (FRM == RISCVFPRndMode::RTZ) {
15024 // Use the dedicated trunc static rounding mode if we're truncating so we
15025 // don't need to generate calls to fsrmi/fsrm
15026 unsigned Opc =
15028 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
15029 } else if (FRM == RISCVFPRndMode::DYN) {
15030 unsigned Opc =
15032 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
15033 } else {
15034 unsigned Opc =
15036 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
15037 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
15038 }
15039
15040 // If converted from fixed-length to scalable, convert back
15041 if (VT.isFixedLengthVector())
15042 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
15043
15044 return FpToInt;
15045 }
15046
15047 // Only handle XLen or i32 types. Other types narrower than XLen will
15048 // eventually be legalized to XLenVT.
15049 if (VT != MVT::i32 && VT != XLenVT)
15050 return SDValue();
15051
15052 unsigned Opc;
15053 if (VT == XLenVT)
15054 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
15055 else
15057
15058 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
15059 DAG.getTargetConstant(FRM, DL, XLenVT));
15060 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
15061}
15062
15063// Fold
15064// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
15065// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
15066// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
15067// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
15068// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
15069// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
15072 const RISCVSubtarget &Subtarget) {
15073 SelectionDAG &DAG = DCI.DAG;
15074 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15075 MVT XLenVT = Subtarget.getXLenVT();
15076
15077 // Only handle XLen types. Other types narrower than XLen will eventually be
15078 // legalized to XLenVT.
15079 EVT DstVT = N->getValueType(0);
15080 if (DstVT != XLenVT)
15081 return SDValue();
15082
15083 SDValue Src = N->getOperand(0);
15084
15085 // Don't do this for strict-fp Src.
15086 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
15087 return SDValue();
15088
15089 // Ensure the FP type is also legal.
15090 if (!TLI.isTypeLegal(Src.getValueType()))
15091 return SDValue();
15092
15093 // Don't do this for f16 with Zfhmin and not Zfh.
15094 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
15095 return SDValue();
15096
15097 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
15098
15099 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
15100 if (FRM == RISCVFPRndMode::Invalid)
15101 return SDValue();
15102
15103 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
15104
15105 unsigned Opc;
15106 if (SatVT == DstVT)
15107 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
15108 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
15110 else
15111 return SDValue();
15112 // FIXME: Support other SatVTs by clamping before or after the conversion.
15113
15114 Src = Src.getOperand(0);
15115
15116 SDLoc DL(N);
15117 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
15118 DAG.getTargetConstant(FRM, DL, XLenVT));
15119
15120 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
15121 // extend.
15122 if (Opc == RISCVISD::FCVT_WU_RV64)
15123 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
15124
15125 // RISC-V FP-to-int conversions saturate to the destination register size, but
15126 // don't produce 0 for nan.
15127 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
15128 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
15129}
15130
15131// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
15132// smaller than XLenVT.
15134 const RISCVSubtarget &Subtarget) {
15135 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
15136
15137 SDValue Src = N->getOperand(0);
15138 if (Src.getOpcode() != ISD::BSWAP)
15139 return SDValue();
15140
15141 EVT VT = N->getValueType(0);
15142 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
15143 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
15144 return SDValue();
15145
15146 SDLoc DL(N);
15147 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
15148}
15149
15150// Convert from one FMA opcode to another based on whether we are negating the
15151// multiply result and/or the accumulator.
15152// NOTE: Only supports RVV operations with VL.
15153static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
15154 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
15155 if (NegMul) {
15156 // clang-format off
15157 switch (Opcode) {
15158 default: llvm_unreachable("Unexpected opcode");
15159 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
15160 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
15161 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
15162 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
15167 }
15168 // clang-format on
15169 }
15170
15171 // Negating the accumulator changes ADD<->SUB.
15172 if (NegAcc) {
15173 // clang-format off
15174 switch (Opcode) {
15175 default: llvm_unreachable("Unexpected opcode");
15176 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
15177 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
15178 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
15179 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
15184 }
15185 // clang-format on
15186 }
15187
15188 return Opcode;
15189}
15190
15192 // Fold FNEG_VL into FMA opcodes.
15193 // The first operand of strict-fp is chain.
15194 unsigned Offset = N->isTargetStrictFPOpcode();
15195 SDValue A = N->getOperand(0 + Offset);
15196 SDValue B = N->getOperand(1 + Offset);
15197 SDValue C = N->getOperand(2 + Offset);
15198 SDValue Mask = N->getOperand(3 + Offset);
15199 SDValue VL = N->getOperand(4 + Offset);
15200
15201 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
15202 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
15203 V.getOperand(2) == VL) {
15204 // Return the negated input.
15205 V = V.getOperand(0);
15206 return true;
15207 }
15208
15209 return false;
15210 };
15211
15212 bool NegA = invertIfNegative(A);
15213 bool NegB = invertIfNegative(B);
15214 bool NegC = invertIfNegative(C);
15215
15216 // If no operands are negated, we're done.
15217 if (!NegA && !NegB && !NegC)
15218 return SDValue();
15219
15220 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
15221 if (N->isTargetStrictFPOpcode())
15222 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
15223 {N->getOperand(0), A, B, C, Mask, VL});
15224 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
15225 VL);
15226}
15227
15230 const RISCVSubtarget &Subtarget) {
15231 SelectionDAG &DAG = DCI.DAG;
15232
15234 return V;
15235
15236 if (N->getValueType(0).getVectorElementType() == MVT::f32 &&
15237 !Subtarget.hasVInstructionsF16())
15238 return SDValue();
15239
15240 // FIXME: Ignore strict opcodes for now.
15241 if (N->isTargetStrictFPOpcode())
15242 return SDValue();
15243
15244 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
15245}
15246
15248 const RISCVSubtarget &Subtarget) {
15249 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
15250
15251 EVT VT = N->getValueType(0);
15252
15253 if (VT != Subtarget.getXLenVT())
15254 return SDValue();
15255
15256 if (!isa<ConstantSDNode>(N->getOperand(1)))
15257 return SDValue();
15258 uint64_t ShAmt = N->getConstantOperandVal(1);
15259
15260 SDValue N0 = N->getOperand(0);
15261
15262 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
15263 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
15264 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
15265 unsigned ExtSize =
15266 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
15267 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
15268 N0.getOperand(0).hasOneUse() &&
15269 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
15270 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
15271 if (LShAmt < ExtSize) {
15272 unsigned Size = VT.getSizeInBits();
15273 SDLoc ShlDL(N0.getOperand(0));
15274 SDValue Shl =
15275 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
15276 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
15277 SDLoc DL(N);
15278 return DAG.getNode(ISD::SRA, DL, VT, Shl,
15279 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
15280 }
15281 }
15282 }
15283
15284 if (ShAmt > 32 || VT != MVT::i64)
15285 return SDValue();
15286
15287 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
15288 // FIXME: Should this be a generic combine? There's a similar combine on X86.
15289 //
15290 // Also try these folds where an add or sub is in the middle.
15291 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
15292 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
15293 SDValue Shl;
15294 ConstantSDNode *AddC = nullptr;
15295
15296 // We might have an ADD or SUB between the SRA and SHL.
15297 bool IsAdd = N0.getOpcode() == ISD::ADD;
15298 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
15299 // Other operand needs to be a constant we can modify.
15300 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
15301 if (!AddC)
15302 return SDValue();
15303
15304 // AddC needs to have at least 32 trailing zeros.
15305 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
15306 return SDValue();
15307
15308 // All users should be a shift by constant less than or equal to 32. This
15309 // ensures we'll do this optimization for each of them to produce an
15310 // add/sub+sext_inreg they can all share.
15311 for (SDNode *U : N0->uses()) {
15312 if (U->getOpcode() != ISD::SRA ||
15313 !isa<ConstantSDNode>(U->getOperand(1)) ||
15314 U->getConstantOperandVal(1) > 32)
15315 return SDValue();
15316 }
15317
15318 Shl = N0.getOperand(IsAdd ? 0 : 1);
15319 } else {
15320 // Not an ADD or SUB.
15321 Shl = N0;
15322 }
15323
15324 // Look for a shift left by 32.
15325 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
15326 Shl.getConstantOperandVal(1) != 32)
15327 return SDValue();
15328
15329 // We if we didn't look through an add/sub, then the shl should have one use.
15330 // If we did look through an add/sub, the sext_inreg we create is free so
15331 // we're only creating 2 new instructions. It's enough to only remove the
15332 // original sra+add/sub.
15333 if (!AddC && !Shl.hasOneUse())
15334 return SDValue();
15335
15336 SDLoc DL(N);
15337 SDValue In = Shl.getOperand(0);
15338
15339 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
15340 // constant.
15341 if (AddC) {
15342 SDValue ShiftedAddC =
15343 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
15344 if (IsAdd)
15345 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
15346 else
15347 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
15348 }
15349
15350 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
15351 DAG.getValueType(MVT::i32));
15352 if (ShAmt == 32)
15353 return SExt;
15354
15355 return DAG.getNode(
15356 ISD::SHL, DL, MVT::i64, SExt,
15357 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
15358}
15359
15360// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
15361// the result is used as the conditon of a br_cc or select_cc we can invert,
15362// inverting the setcc is free, and Z is 0/1. Caller will invert the
15363// br_cc/select_cc.
15365 bool IsAnd = Cond.getOpcode() == ISD::AND;
15366 if (!IsAnd && Cond.getOpcode() != ISD::OR)
15367 return SDValue();
15368
15369 if (!Cond.hasOneUse())
15370 return SDValue();
15371
15372 SDValue Setcc = Cond.getOperand(0);
15373 SDValue Xor = Cond.getOperand(1);
15374 // Canonicalize setcc to LHS.
15375 if (Setcc.getOpcode() != ISD::SETCC)
15376 std::swap(Setcc, Xor);
15377 // LHS should be a setcc and RHS should be an xor.
15378 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
15379 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
15380 return SDValue();
15381
15382 // If the condition is an And, SimplifyDemandedBits may have changed
15383 // (xor Z, 1) to (not Z).
15384 SDValue Xor1 = Xor.getOperand(1);
15385 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
15386 return SDValue();
15387
15388 EVT VT = Cond.getValueType();
15389 SDValue Xor0 = Xor.getOperand(0);
15390
15391 // The LHS of the xor needs to be 0/1.
15393 if (!DAG.MaskedValueIsZero(Xor0, Mask))
15394 return SDValue();
15395
15396 // We can only invert integer setccs.
15397 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
15398 if (!SetCCOpVT.isScalarInteger())
15399 return SDValue();
15400
15401 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
15402 if (ISD::isIntEqualitySetCC(CCVal)) {
15403 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15404 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
15405 Setcc.getOperand(1), CCVal);
15406 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
15407 // Invert (setlt 0, X) by converting to (setlt X, 1).
15408 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
15409 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
15410 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
15411 // (setlt X, 1) by converting to (setlt 0, X).
15412 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
15413 DAG.getConstant(0, SDLoc(Setcc), VT),
15414 Setcc.getOperand(0), CCVal);
15415 } else
15416 return SDValue();
15417
15418 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15419 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
15420}
15421
15422// Perform common combines for BR_CC and SELECT_CC condtions.
15423static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
15424 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
15425 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
15426
15427 // As far as arithmetic right shift always saves the sign,
15428 // shift can be omitted.
15429 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
15430 // setge (sra X, N), 0 -> setge X, 0
15431 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
15432 LHS.getOpcode() == ISD::SRA) {
15433 LHS = LHS.getOperand(0);
15434 return true;
15435 }
15436
15437 if (!ISD::isIntEqualitySetCC(CCVal))
15438 return false;
15439
15440 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
15441 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
15442 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
15443 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
15444 // If we're looking for eq 0 instead of ne 0, we need to invert the
15445 // condition.
15446 bool Invert = CCVal == ISD::SETEQ;
15447 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
15448 if (Invert)
15449 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15450
15451 RHS = LHS.getOperand(1);
15452 LHS = LHS.getOperand(0);
15453 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
15454
15455 CC = DAG.getCondCode(CCVal);
15456 return true;
15457 }
15458
15459 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
15460 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
15461 RHS = LHS.getOperand(1);
15462 LHS = LHS.getOperand(0);
15463 return true;
15464 }
15465
15466 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
15467 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
15468 LHS.getOperand(1).getOpcode() == ISD::Constant) {
15469 SDValue LHS0 = LHS.getOperand(0);
15470 if (LHS0.getOpcode() == ISD::AND &&
15471 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
15472 uint64_t Mask = LHS0.getConstantOperandVal(1);
15473 uint64_t ShAmt = LHS.getConstantOperandVal(1);
15474 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
15475 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
15476 CC = DAG.getCondCode(CCVal);
15477
15478 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
15479 LHS = LHS0.getOperand(0);
15480 if (ShAmt != 0)
15481 LHS =
15482 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
15483 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
15484 return true;
15485 }
15486 }
15487 }
15488
15489 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
15490 // This can occur when legalizing some floating point comparisons.
15491 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
15492 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
15493 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15494 CC = DAG.getCondCode(CCVal);
15495 RHS = DAG.getConstant(0, DL, LHS.getValueType());
15496 return true;
15497 }
15498
15499 if (isNullConstant(RHS)) {
15500 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
15501 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15502 CC = DAG.getCondCode(CCVal);
15503 LHS = NewCond;
15504 return true;
15505 }
15506 }
15507
15508 return false;
15509}
15510
15511// Fold
15512// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
15513// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
15514// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
15515// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
15517 SDValue TrueVal, SDValue FalseVal,
15518 bool Swapped) {
15519 bool Commutative = true;
15520 unsigned Opc = TrueVal.getOpcode();
15521 switch (Opc) {
15522 default:
15523 return SDValue();
15524 case ISD::SHL:
15525 case ISD::SRA:
15526 case ISD::SRL:
15527 case ISD::SUB:
15528 Commutative = false;
15529 break;
15530 case ISD::ADD:
15531 case ISD::OR:
15532 case ISD::XOR:
15533 break;
15534 }
15535
15536 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
15537 return SDValue();
15538
15539 unsigned OpToFold;
15540 if (FalseVal == TrueVal.getOperand(0))
15541 OpToFold = 0;
15542 else if (Commutative && FalseVal == TrueVal.getOperand(1))
15543 OpToFold = 1;
15544 else
15545 return SDValue();
15546
15547 EVT VT = N->getValueType(0);
15548 SDLoc DL(N);
15549 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
15550 EVT OtherOpVT = OtherOp.getValueType();
15551 SDValue IdentityOperand =
15552 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
15553 if (!Commutative)
15554 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
15555 assert(IdentityOperand && "No identity operand!");
15556
15557 if (Swapped)
15558 std::swap(OtherOp, IdentityOperand);
15559 SDValue NewSel =
15560 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
15561 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
15562}
15563
15564// This tries to get rid of `select` and `icmp` that are being used to handle
15565// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
15567 SDValue Cond = N->getOperand(0);
15568
15569 // This represents either CTTZ or CTLZ instruction.
15570 SDValue CountZeroes;
15571
15572 SDValue ValOnZero;
15573
15574 if (Cond.getOpcode() != ISD::SETCC)
15575 return SDValue();
15576
15577 if (!isNullConstant(Cond->getOperand(1)))
15578 return SDValue();
15579
15580 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
15581 if (CCVal == ISD::CondCode::SETEQ) {
15582 CountZeroes = N->getOperand(2);
15583 ValOnZero = N->getOperand(1);
15584 } else if (CCVal == ISD::CondCode::SETNE) {
15585 CountZeroes = N->getOperand(1);
15586 ValOnZero = N->getOperand(2);
15587 } else {
15588 return SDValue();
15589 }
15590
15591 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
15592 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
15593 CountZeroes = CountZeroes.getOperand(0);
15594
15595 if (CountZeroes.getOpcode() != ISD::CTTZ &&
15596 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
15597 CountZeroes.getOpcode() != ISD::CTLZ &&
15598 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
15599 return SDValue();
15600
15601 if (!isNullConstant(ValOnZero))
15602 return SDValue();
15603
15604 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
15605 if (Cond->getOperand(0) != CountZeroesArgument)
15606 return SDValue();
15607
15608 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
15609 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
15610 CountZeroes.getValueType(), CountZeroesArgument);
15611 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
15612 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
15613 CountZeroes.getValueType(), CountZeroesArgument);
15614 }
15615
15616 unsigned BitWidth = CountZeroes.getValueSizeInBits();
15617 SDValue BitWidthMinusOne =
15618 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
15619
15620 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
15621 CountZeroes, BitWidthMinusOne);
15622 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
15623}
15624
15626 const RISCVSubtarget &Subtarget) {
15627 SDValue Cond = N->getOperand(0);
15628 SDValue True = N->getOperand(1);
15629 SDValue False = N->getOperand(2);
15630 SDLoc DL(N);
15631 EVT VT = N->getValueType(0);
15632 EVT CondVT = Cond.getValueType();
15633
15634 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
15635 return SDValue();
15636
15637 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
15638 // BEXTI, where C is power of 2.
15639 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
15640 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
15641 SDValue LHS = Cond.getOperand(0);
15642 SDValue RHS = Cond.getOperand(1);
15643 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15644 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
15645 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
15646 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
15647 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
15648 return DAG.getSelect(DL, VT,
15649 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
15650 False, True);
15651 }
15652 }
15653 return SDValue();
15654}
15655
15657 const RISCVSubtarget &Subtarget) {
15658 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
15659 return Folded;
15660
15661 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
15662 return V;
15663
15664 if (Subtarget.hasConditionalMoveFusion())
15665 return SDValue();
15666
15667 SDValue TrueVal = N->getOperand(1);
15668 SDValue FalseVal = N->getOperand(2);
15669 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
15670 return V;
15671 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
15672}
15673
15674/// If we have a build_vector where each lane is binop X, C, where C
15675/// is a constant (but not necessarily the same constant on all lanes),
15676/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
15677/// We assume that materializing a constant build vector will be no more
15678/// expensive that performing O(n) binops.
15680 const RISCVSubtarget &Subtarget,
15681 const RISCVTargetLowering &TLI) {
15682 SDLoc DL(N);
15683 EVT VT = N->getValueType(0);
15684
15685 assert(!VT.isScalableVector() && "unexpected build vector");
15686
15687 if (VT.getVectorNumElements() == 1)
15688 return SDValue();
15689
15690 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
15691 if (!TLI.isBinOp(Opcode))
15692 return SDValue();
15693
15694 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
15695 return SDValue();
15696
15697 // This BUILD_VECTOR involves an implicit truncation, and sinking
15698 // truncates through binops is non-trivial.
15699 if (N->op_begin()->getValueType() != VT.getVectorElementType())
15700 return SDValue();
15701
15702 SmallVector<SDValue> LHSOps;
15703 SmallVector<SDValue> RHSOps;
15704 for (SDValue Op : N->ops()) {
15705 if (Op.isUndef()) {
15706 // We can't form a divide or remainder from undef.
15707 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
15708 return SDValue();
15709
15710 LHSOps.push_back(Op);
15711 RHSOps.push_back(Op);
15712 continue;
15713 }
15714
15715 // TODO: We can handle operations which have an neutral rhs value
15716 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
15717 // of profit in a more explicit manner.
15718 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
15719 return SDValue();
15720
15721 LHSOps.push_back(Op.getOperand(0));
15722 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
15723 !isa<ConstantFPSDNode>(Op.getOperand(1)))
15724 return SDValue();
15725 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15726 // have different LHS and RHS types.
15727 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
15728 return SDValue();
15729
15730 RHSOps.push_back(Op.getOperand(1));
15731 }
15732
15733 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
15734 DAG.getBuildVector(VT, DL, RHSOps));
15735}
15736
15738 const RISCVSubtarget &Subtarget,
15739 const RISCVTargetLowering &TLI) {
15740 SDValue InVec = N->getOperand(0);
15741 SDValue InVal = N->getOperand(1);
15742 SDValue EltNo = N->getOperand(2);
15743 SDLoc DL(N);
15744
15745 EVT VT = InVec.getValueType();
15746 if (VT.isScalableVector())
15747 return SDValue();
15748
15749 if (!InVec.hasOneUse())
15750 return SDValue();
15751
15752 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
15753 // move the insert_vector_elts into the arms of the binop. Note that
15754 // the new RHS must be a constant.
15755 const unsigned InVecOpcode = InVec->getOpcode();
15756 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
15757 InVal.hasOneUse()) {
15758 SDValue InVecLHS = InVec->getOperand(0);
15759 SDValue InVecRHS = InVec->getOperand(1);
15760 SDValue InValLHS = InVal->getOperand(0);
15761 SDValue InValRHS = InVal->getOperand(1);
15762
15764 return SDValue();
15765 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
15766 return SDValue();
15767 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15768 // have different LHS and RHS types.
15769 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
15770 return SDValue();
15772 InVecLHS, InValLHS, EltNo);
15774 InVecRHS, InValRHS, EltNo);
15775 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
15776 }
15777
15778 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
15779 // move the insert_vector_elt to the source operand of the concat_vector.
15780 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
15781 return SDValue();
15782
15783 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
15784 if (!IndexC)
15785 return SDValue();
15786 unsigned Elt = IndexC->getZExtValue();
15787
15788 EVT ConcatVT = InVec.getOperand(0).getValueType();
15789 if (ConcatVT.getVectorElementType() != InVal.getValueType())
15790 return SDValue();
15791 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
15792 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
15793
15794 unsigned ConcatOpIdx = Elt / ConcatNumElts;
15795 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
15796 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
15797 ConcatOp, InVal, NewIdx);
15798
15799 SmallVector<SDValue> ConcatOps;
15800 ConcatOps.append(InVec->op_begin(), InVec->op_end());
15801 ConcatOps[ConcatOpIdx] = ConcatOp;
15802 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15803}
15804
15805// If we're concatenating a series of vector loads like
15806// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
15807// Then we can turn this into a strided load by widening the vector elements
15808// vlse32 p, stride=n
15810 const RISCVSubtarget &Subtarget,
15811 const RISCVTargetLowering &TLI) {
15812 SDLoc DL(N);
15813 EVT VT = N->getValueType(0);
15814
15815 // Only perform this combine on legal MVTs.
15816 if (!TLI.isTypeLegal(VT))
15817 return SDValue();
15818
15819 // TODO: Potentially extend this to scalable vectors
15820 if (VT.isScalableVector())
15821 return SDValue();
15822
15823 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
15824 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
15825 !SDValue(BaseLd, 0).hasOneUse())
15826 return SDValue();
15827
15828 EVT BaseLdVT = BaseLd->getValueType(0);
15829
15830 // Go through the loads and check that they're strided
15832 Lds.push_back(BaseLd);
15833 Align Align = BaseLd->getAlign();
15834 for (SDValue Op : N->ops().drop_front()) {
15835 auto *Ld = dyn_cast<LoadSDNode>(Op);
15836 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
15837 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
15838 Ld->getValueType(0) != BaseLdVT)
15839 return SDValue();
15840
15841 Lds.push_back(Ld);
15842
15843 // The common alignment is the most restrictive (smallest) of all the loads
15844 Align = std::min(Align, Ld->getAlign());
15845 }
15846
15847 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
15848 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
15849 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
15850 // If the load ptrs can be decomposed into a common (Base + Index) with a
15851 // common constant stride, then return the constant stride.
15852 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
15853 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
15854 if (BIO1.equalBaseIndex(BIO2, DAG))
15855 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
15856
15857 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
15858 SDValue P1 = Ld1->getBasePtr();
15859 SDValue P2 = Ld2->getBasePtr();
15860 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
15861 return {{P2.getOperand(1), false}};
15862 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
15863 return {{P1.getOperand(1), true}};
15864
15865 return std::nullopt;
15866 };
15867
15868 // Get the distance between the first and second loads
15869 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
15870 if (!BaseDiff)
15871 return SDValue();
15872
15873 // Check all the loads are the same distance apart
15874 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
15875 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
15876 return SDValue();
15877
15878 // TODO: At this point, we've successfully matched a generalized gather
15879 // load. Maybe we should emit that, and then move the specialized
15880 // matchers above and below into a DAG combine?
15881
15882 // Get the widened scalar type, e.g. v4i8 -> i64
15883 unsigned WideScalarBitWidth =
15884 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
15885 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
15886
15887 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
15888 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
15889 if (!TLI.isTypeLegal(WideVecVT))
15890 return SDValue();
15891
15892 // Check that the operation is legal
15893 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
15894 return SDValue();
15895
15896 auto [StrideVariant, MustNegateStride] = *BaseDiff;
15897 SDValue Stride =
15898 std::holds_alternative<SDValue>(StrideVariant)
15899 ? std::get<SDValue>(StrideVariant)
15900 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
15901 Lds[0]->getOffset().getValueType());
15902 if (MustNegateStride)
15903 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
15904
15905 SDValue AllOneMask =
15906 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
15907 DAG.getConstant(1, DL, MVT::i1));
15908
15909 uint64_t MemSize;
15910 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
15911 ConstStride && ConstStride->getSExtValue() >= 0)
15912 // total size = (elsize * n) + (stride - elsize) * (n-1)
15913 // = elsize + stride * (n-1)
15914 MemSize = WideScalarVT.getSizeInBits() +
15915 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
15916 else
15917 // If Stride isn't constant, then we can't know how much it will load
15919
15921 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
15922 Align);
15923
15924 SDValue StridedLoad = DAG.getStridedLoadVP(
15925 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
15926 AllOneMask,
15927 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
15928
15929 for (SDValue Ld : N->ops())
15930 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
15931
15932 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
15933}
15934
15936 const RISCVSubtarget &Subtarget) {
15937
15938 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
15939
15940 if (N->getValueType(0).isFixedLengthVector())
15941 return SDValue();
15942
15943 SDValue Addend = N->getOperand(0);
15944 SDValue MulOp = N->getOperand(1);
15945
15946 if (N->getOpcode() == RISCVISD::ADD_VL) {
15947 SDValue AddPassthruOp = N->getOperand(2);
15948 if (!AddPassthruOp.isUndef())
15949 return SDValue();
15950 }
15951
15952 auto IsVWMulOpc = [](unsigned Opc) {
15953 switch (Opc) {
15954 case RISCVISD::VWMUL_VL:
15957 return true;
15958 default:
15959 return false;
15960 }
15961 };
15962
15963 if (!IsVWMulOpc(MulOp.getOpcode()))
15964 std::swap(Addend, MulOp);
15965
15966 if (!IsVWMulOpc(MulOp.getOpcode()))
15967 return SDValue();
15968
15969 SDValue MulPassthruOp = MulOp.getOperand(2);
15970
15971 if (!MulPassthruOp.isUndef())
15972 return SDValue();
15973
15974 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
15975 const RISCVSubtarget &Subtarget) {
15976 if (N->getOpcode() == ISD::ADD) {
15977 SDLoc DL(N);
15978 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
15979 Subtarget);
15980 }
15981 return std::make_pair(N->getOperand(3), N->getOperand(4));
15982 }(N, DAG, Subtarget);
15983
15984 SDValue MulMask = MulOp.getOperand(3);
15985 SDValue MulVL = MulOp.getOperand(4);
15986
15987 if (AddMask != MulMask || AddVL != MulVL)
15988 return SDValue();
15989
15990 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
15991 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
15992 "Unexpected opcode after VWMACC_VL");
15993 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
15994 "Unexpected opcode after VWMACC_VL!");
15995 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
15996 "Unexpected opcode after VWMUL_VL!");
15997 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
15998 "Unexpected opcode after VWMUL_VL!");
15999
16000 SDLoc DL(N);
16001 EVT VT = N->getValueType(0);
16002 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
16003 AddVL};
16004 return DAG.getNode(Opc, DL, VT, Ops);
16005}
16006
16008 ISD::MemIndexType &IndexType,
16010 if (!DCI.isBeforeLegalize())
16011 return false;
16012
16013 SelectionDAG &DAG = DCI.DAG;
16014 const MVT XLenVT =
16015 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
16016
16017 const EVT IndexVT = Index.getValueType();
16018
16019 // RISC-V indexed loads only support the "unsigned unscaled" addressing
16020 // mode, so anything else must be manually legalized.
16021 if (!isIndexTypeSigned(IndexType))
16022 return false;
16023
16024 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
16025 // Any index legalization should first promote to XLenVT, so we don't lose
16026 // bits when scaling. This may create an illegal index type so we let
16027 // LLVM's legalization take care of the splitting.
16028 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
16030 IndexVT.changeVectorElementType(XLenVT), Index);
16031 }
16032 IndexType = ISD::UNSIGNED_SCALED;
16033 return true;
16034}
16035
16036/// Match the index vector of a scatter or gather node as the shuffle mask
16037/// which performs the rearrangement if possible. Will only match if
16038/// all lanes are touched, and thus replacing the scatter or gather with
16039/// a unit strided access and shuffle is legal.
16041 SmallVector<int> &ShuffleMask) {
16042 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
16043 return false;
16045 return false;
16046
16047 const unsigned ElementSize = VT.getScalarStoreSize();
16048 const unsigned NumElems = VT.getVectorNumElements();
16049
16050 // Create the shuffle mask and check all bits active
16051 assert(ShuffleMask.empty());
16052 BitVector ActiveLanes(NumElems);
16053 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
16054 // TODO: We've found an active bit of UB, and could be
16055 // more aggressive here if desired.
16056 if (Index->getOperand(i)->isUndef())
16057 return false;
16058 uint64_t C = Index->getConstantOperandVal(i);
16059 if (C % ElementSize != 0)
16060 return false;
16061 C = C / ElementSize;
16062 if (C >= NumElems)
16063 return false;
16064 ShuffleMask.push_back(C);
16065 ActiveLanes.set(C);
16066 }
16067 return ActiveLanes.all();
16068}
16069
16070/// Match the index of a gather or scatter operation as an operation
16071/// with twice the element width and half the number of elements. This is
16072/// generally profitable (if legal) because these operations are linear
16073/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
16074/// come out ahead.
16076 Align BaseAlign, const RISCVSubtarget &ST) {
16077 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
16078 return false;
16080 return false;
16081
16082 // Attempt a doubling. If we can use a element type 4x or 8x in
16083 // size, this will happen via multiply iterations of the transform.
16084 const unsigned NumElems = VT.getVectorNumElements();
16085 if (NumElems % 2 != 0)
16086 return false;
16087
16088 const unsigned ElementSize = VT.getScalarStoreSize();
16089 const unsigned WiderElementSize = ElementSize * 2;
16090 if (WiderElementSize > ST.getELen()/8)
16091 return false;
16092
16093 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
16094 return false;
16095
16096 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
16097 // TODO: We've found an active bit of UB, and could be
16098 // more aggressive here if desired.
16099 if (Index->getOperand(i)->isUndef())
16100 return false;
16101 // TODO: This offset check is too strict if we support fully
16102 // misaligned memory operations.
16103 uint64_t C = Index->getConstantOperandVal(i);
16104 if (i % 2 == 0) {
16105 if (C % WiderElementSize != 0)
16106 return false;
16107 continue;
16108 }
16109 uint64_t Last = Index->getConstantOperandVal(i-1);
16110 if (C != Last + ElementSize)
16111 return false;
16112 }
16113 return true;
16114}
16115
16116// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
16117// This would be benefit for the cases where X and Y are both the same value
16118// type of low precision vectors. Since the truncate would be lowered into
16119// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
16120// restriction, such pattern would be expanded into a series of "vsetvli"
16121// and "vnsrl" instructions later to reach this point.
16123 SDValue Mask = N->getOperand(1);
16124 SDValue VL = N->getOperand(2);
16125
16126 bool IsVLMAX = isAllOnesConstant(VL) ||
16127 (isa<RegisterSDNode>(VL) &&
16128 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
16129 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
16130 Mask.getOperand(0) != VL)
16131 return SDValue();
16132
16133 auto IsTruncNode = [&](SDValue V) {
16134 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
16135 V.getOperand(1) == Mask && V.getOperand(2) == VL;
16136 };
16137
16138 SDValue Op = N->getOperand(0);
16139
16140 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
16141 // to distinguish such pattern.
16142 while (IsTruncNode(Op)) {
16143 if (!Op.hasOneUse())
16144 return SDValue();
16145 Op = Op.getOperand(0);
16146 }
16147
16148 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
16149 return SDValue();
16150
16151 SDValue N0 = Op.getOperand(0);
16152 SDValue N1 = Op.getOperand(1);
16153 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
16154 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
16155 return SDValue();
16156
16157 SDValue N00 = N0.getOperand(0);
16158 SDValue N10 = N1.getOperand(0);
16159 if (!N00.getValueType().isVector() ||
16160 N00.getValueType() != N10.getValueType() ||
16161 N->getValueType(0) != N10.getValueType())
16162 return SDValue();
16163
16164 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
16165 SDValue SMin =
16166 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
16167 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
16168 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
16169}
16170
16171// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
16172// maximum value for the truncated type.
16173// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
16174// is the signed maximum value for the truncated type and C2 is the signed
16175// minimum value.
16177 const RISCVSubtarget &Subtarget) {
16178 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
16179
16180 MVT VT = N->getSimpleValueType(0);
16181
16182 SDValue Mask = N->getOperand(1);
16183 SDValue VL = N->getOperand(2);
16184
16185 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
16186 APInt &SplatVal) {
16187 if (V.getOpcode() != Opc &&
16188 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
16189 V.getOperand(3) == Mask && V.getOperand(4) == VL))
16190 return SDValue();
16191
16192 SDValue Op = V.getOperand(1);
16193
16194 // Peek through conversion between fixed and scalable vectors.
16195 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
16196 isNullConstant(Op.getOperand(2)) &&
16197 Op.getOperand(1).getValueType().isFixedLengthVector() &&
16198 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16199 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
16200 isNullConstant(Op.getOperand(1).getOperand(1)))
16201 Op = Op.getOperand(1).getOperand(0);
16202
16203 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
16204 return V.getOperand(0);
16205
16206 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
16207 Op.getOperand(2) == VL) {
16208 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
16209 SplatVal =
16210 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
16211 return V.getOperand(0);
16212 }
16213 }
16214
16215 return SDValue();
16216 };
16217
16218 SDLoc DL(N);
16219
16220 auto DetectUSatPattern = [&](SDValue V) {
16221 APInt LoC, HiC;
16222
16223 // Simple case, V is a UMIN.
16224 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
16225 if (HiC.isMask(VT.getScalarSizeInBits()))
16226 return UMinOp;
16227
16228 // If we have an SMAX that removes negative numbers first, then we can match
16229 // SMIN instead of UMIN.
16230 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16231 if (SDValue SMaxOp =
16232 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16233 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
16234 return SMinOp;
16235
16236 // If we have an SMIN before an SMAX and the SMAX constant is less than or
16237 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
16238 // first.
16239 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16240 if (SDValue SMinOp =
16241 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16242 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
16243 HiC.uge(LoC))
16244 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
16245 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
16246 Mask, VL);
16247
16248 return SDValue();
16249 };
16250
16251 auto DetectSSatPattern = [&](SDValue V) {
16252 unsigned NumDstBits = VT.getScalarSizeInBits();
16253 unsigned NumSrcBits = V.getScalarValueSizeInBits();
16254 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
16255 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
16256
16257 APInt HiC, LoC;
16258 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16259 if (SDValue SMaxOp =
16260 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16261 if (HiC == SignedMax && LoC == SignedMin)
16262 return SMaxOp;
16263
16264 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16265 if (SDValue SMinOp =
16266 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16267 if (HiC == SignedMax && LoC == SignedMin)
16268 return SMinOp;
16269
16270 return SDValue();
16271 };
16272
16273 SDValue Src = N->getOperand(0);
16274
16275 // Look through multiple layers of truncates.
16276 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
16277 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
16278 Src.hasOneUse())
16279 Src = Src.getOperand(0);
16280
16281 SDValue Val;
16282 unsigned ClipOpc;
16283 if ((Val = DetectUSatPattern(Src)))
16285 else if ((Val = DetectSSatPattern(Src)))
16287 else
16288 return SDValue();
16289
16290 MVT ValVT = Val.getSimpleValueType();
16291
16292 do {
16293 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
16294 ValVT = ValVT.changeVectorElementType(ValEltVT);
16295 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
16296 } while (ValVT != VT);
16297
16298 return Val;
16299}
16300
16302 DAGCombinerInfo &DCI) const {
16303 SelectionDAG &DAG = DCI.DAG;
16304 const MVT XLenVT = Subtarget.getXLenVT();
16305 SDLoc DL(N);
16306
16307 // Helper to call SimplifyDemandedBits on an operand of N where only some low
16308 // bits are demanded. N will be added to the Worklist if it was not deleted.
16309 // Caller should return SDValue(N, 0) if this returns true.
16310 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
16311 SDValue Op = N->getOperand(OpNo);
16312 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
16313 if (!SimplifyDemandedBits(Op, Mask, DCI))
16314 return false;
16315
16316 if (N->getOpcode() != ISD::DELETED_NODE)
16317 DCI.AddToWorklist(N);
16318 return true;
16319 };
16320
16321 switch (N->getOpcode()) {
16322 default:
16323 break;
16324 case RISCVISD::SplitF64: {
16325 SDValue Op0 = N->getOperand(0);
16326 // If the input to SplitF64 is just BuildPairF64 then the operation is
16327 // redundant. Instead, use BuildPairF64's operands directly.
16328 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
16329 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
16330
16331 if (Op0->isUndef()) {
16332 SDValue Lo = DAG.getUNDEF(MVT::i32);
16333 SDValue Hi = DAG.getUNDEF(MVT::i32);
16334 return DCI.CombineTo(N, Lo, Hi);
16335 }
16336
16337 // It's cheaper to materialise two 32-bit integers than to load a double
16338 // from the constant pool and transfer it to integer registers through the
16339 // stack.
16340 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
16341 APInt V = C->getValueAPF().bitcastToAPInt();
16342 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
16343 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
16344 return DCI.CombineTo(N, Lo, Hi);
16345 }
16346
16347 // This is a target-specific version of a DAGCombine performed in
16348 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16349 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16350 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16351 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16352 !Op0.getNode()->hasOneUse())
16353 break;
16354 SDValue NewSplitF64 =
16355 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
16356 Op0.getOperand(0));
16357 SDValue Lo = NewSplitF64.getValue(0);
16358 SDValue Hi = NewSplitF64.getValue(1);
16359 APInt SignBit = APInt::getSignMask(32);
16360 if (Op0.getOpcode() == ISD::FNEG) {
16361 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
16362 DAG.getConstant(SignBit, DL, MVT::i32));
16363 return DCI.CombineTo(N, Lo, NewHi);
16364 }
16365 assert(Op0.getOpcode() == ISD::FABS);
16366 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
16367 DAG.getConstant(~SignBit, DL, MVT::i32));
16368 return DCI.CombineTo(N, Lo, NewHi);
16369 }
16370 case RISCVISD::SLLW:
16371 case RISCVISD::SRAW:
16372 case RISCVISD::SRLW:
16373 case RISCVISD::RORW:
16374 case RISCVISD::ROLW: {
16375 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
16376 if (SimplifyDemandedLowBitsHelper(0, 32) ||
16377 SimplifyDemandedLowBitsHelper(1, 5))
16378 return SDValue(N, 0);
16379
16380 break;
16381 }
16382 case RISCVISD::CLZW:
16383 case RISCVISD::CTZW: {
16384 // Only the lower 32 bits of the first operand are read
16385 if (SimplifyDemandedLowBitsHelper(0, 32))
16386 return SDValue(N, 0);
16387 break;
16388 }
16390 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
16391 // conversion is unnecessary and can be replaced with the
16392 // FMV_X_ANYEXTW_RV64 operand.
16393 SDValue Op0 = N->getOperand(0);
16395 return Op0.getOperand(0);
16396 break;
16397 }
16400 SDLoc DL(N);
16401 SDValue Op0 = N->getOperand(0);
16402 MVT VT = N->getSimpleValueType(0);
16403 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
16404 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
16405 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
16406 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
16407 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
16408 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
16409 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
16410 assert(Op0.getOperand(0).getValueType() == VT &&
16411 "Unexpected value type!");
16412 return Op0.getOperand(0);
16413 }
16414
16415 // This is a target-specific version of a DAGCombine performed in
16416 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16417 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16418 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16419 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16420 !Op0.getNode()->hasOneUse())
16421 break;
16422 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
16423 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
16424 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
16425 if (Op0.getOpcode() == ISD::FNEG)
16426 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
16427 DAG.getConstant(SignBit, DL, VT));
16428
16429 assert(Op0.getOpcode() == ISD::FABS);
16430 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
16431 DAG.getConstant(~SignBit, DL, VT));
16432 }
16433 case ISD::ABS: {
16434 EVT VT = N->getValueType(0);
16435 SDValue N0 = N->getOperand(0);
16436 // abs (sext) -> zext (abs)
16437 // abs (zext) -> zext (handled elsewhere)
16438 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
16439 SDValue Src = N0.getOperand(0);
16440 SDLoc DL(N);
16441 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
16442 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
16443 }
16444 break;
16445 }
16446 case ISD::ADD: {
16447 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
16448 return V;
16449 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
16450 return V;
16451 return performADDCombine(N, DCI, Subtarget);
16452 }
16453 case ISD::SUB: {
16454 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
16455 return V;
16456 return performSUBCombine(N, DAG, Subtarget);
16457 }
16458 case ISD::AND:
16459 return performANDCombine(N, DCI, Subtarget);
16460 case ISD::OR: {
16461 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
16462 return V;
16463 return performORCombine(N, DCI, Subtarget);
16464 }
16465 case ISD::XOR:
16466 return performXORCombine(N, DAG, Subtarget);
16467 case ISD::MUL:
16468 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
16469 return V;
16470 return performMULCombine(N, DAG, DCI, Subtarget);
16471 case ISD::SDIV:
16472 case ISD::UDIV:
16473 case ISD::SREM:
16474 case ISD::UREM:
16475 if (SDValue V = combineBinOpOfZExt(N, DAG))
16476 return V;
16477 break;
16478 case ISD::FMUL: {
16479 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
16480 SDValue N0 = N->getOperand(0);
16481 SDValue N1 = N->getOperand(1);
16482 if (N0->getOpcode() != ISD::FCOPYSIGN)
16483 std::swap(N0, N1);
16484 if (N0->getOpcode() != ISD::FCOPYSIGN)
16485 return SDValue();
16486 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0));
16487 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
16488 return SDValue();
16489 EVT VT = N->getValueType(0);
16490 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
16491 return SDValue();
16492 SDValue Sign = N0->getOperand(1);
16493 if (Sign.getValueType() != VT)
16494 return SDValue();
16495 return DAG.getNode(RISCVISD::FSGNJX, SDLoc(N), VT, N1, N0->getOperand(1));
16496 }
16497 case ISD::FADD:
16498 case ISD::UMAX:
16499 case ISD::UMIN:
16500 case ISD::SMAX:
16501 case ISD::SMIN:
16502 case ISD::FMAXNUM:
16503 case ISD::FMINNUM: {
16504 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16505 return V;
16506 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16507 return V;
16508 return SDValue();
16509 }
16510 case ISD::SETCC:
16511 return performSETCCCombine(N, DAG, Subtarget);
16513 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
16514 case ISD::ZERO_EXTEND:
16515 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
16516 // type legalization. This is safe because fp_to_uint produces poison if
16517 // it overflows.
16518 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
16519 SDValue Src = N->getOperand(0);
16520 if (Src.getOpcode() == ISD::FP_TO_UINT &&
16521 isTypeLegal(Src.getOperand(0).getValueType()))
16522 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
16523 Src.getOperand(0));
16524 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
16525 isTypeLegal(Src.getOperand(1).getValueType())) {
16526 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
16527 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
16528 Src.getOperand(0), Src.getOperand(1));
16529 DCI.CombineTo(N, Res);
16530 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
16531 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
16532 return SDValue(N, 0); // Return N so it doesn't get rechecked.
16533 }
16534 }
16535 return SDValue();
16537 if (SDValue V = combineTruncOfSraSext(N, DAG))
16538 return V;
16539 return combineTruncToVnclip(N, DAG, Subtarget);
16540 case ISD::TRUNCATE:
16541 return performTRUNCATECombine(N, DAG, Subtarget);
16542 case ISD::SELECT:
16543 return performSELECTCombine(N, DAG, Subtarget);
16545 case RISCVISD::CZERO_NEZ: {
16546 SDValue Val = N->getOperand(0);
16547 SDValue Cond = N->getOperand(1);
16548
16549 unsigned Opc = N->getOpcode();
16550
16551 // czero_eqz x, x -> x
16552 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
16553 return Val;
16554
16555 unsigned InvOpc =
16557
16558 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
16559 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
16560 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
16561 SDValue NewCond = Cond.getOperand(0);
16562 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
16563 if (DAG.MaskedValueIsZero(NewCond, Mask))
16564 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
16565 }
16566 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
16567 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
16568 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
16569 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
16570 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
16571 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16572 if (ISD::isIntEqualitySetCC(CCVal))
16573 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
16574 N->getValueType(0), Val, Cond.getOperand(0));
16575 }
16576 return SDValue();
16577 }
16578 case RISCVISD::SELECT_CC: {
16579 // Transform
16580 SDValue LHS = N->getOperand(0);
16581 SDValue RHS = N->getOperand(1);
16582 SDValue CC = N->getOperand(2);
16583 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16584 SDValue TrueV = N->getOperand(3);
16585 SDValue FalseV = N->getOperand(4);
16586 SDLoc DL(N);
16587 EVT VT = N->getValueType(0);
16588
16589 // If the True and False values are the same, we don't need a select_cc.
16590 if (TrueV == FalseV)
16591 return TrueV;
16592
16593 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
16594 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
16595 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
16596 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
16597 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
16598 if (CCVal == ISD::CondCode::SETGE)
16599 std::swap(TrueV, FalseV);
16600
16601 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
16602 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
16603 // Only handle simm12, if it is not in this range, it can be considered as
16604 // register.
16605 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
16606 isInt<12>(TrueSImm - FalseSImm)) {
16607 SDValue SRA =
16608 DAG.getNode(ISD::SRA, DL, VT, LHS,
16609 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
16610 SDValue AND =
16611 DAG.getNode(ISD::AND, DL, VT, SRA,
16612 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
16613 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
16614 }
16615
16616 if (CCVal == ISD::CondCode::SETGE)
16617 std::swap(TrueV, FalseV);
16618 }
16619
16620 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16621 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
16622 {LHS, RHS, CC, TrueV, FalseV});
16623
16624 if (!Subtarget.hasConditionalMoveFusion()) {
16625 // (select c, -1, y) -> -c | y
16626 if (isAllOnesConstant(TrueV)) {
16627 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16628 SDValue Neg = DAG.getNegative(C, DL, VT);
16629 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
16630 }
16631 // (select c, y, -1) -> -!c | y
16632 if (isAllOnesConstant(FalseV)) {
16633 SDValue C =
16634 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16635 SDValue Neg = DAG.getNegative(C, DL, VT);
16636 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
16637 }
16638
16639 // (select c, 0, y) -> -!c & y
16640 if (isNullConstant(TrueV)) {
16641 SDValue C =
16642 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16643 SDValue Neg = DAG.getNegative(C, DL, VT);
16644 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
16645 }
16646 // (select c, y, 0) -> -c & y
16647 if (isNullConstant(FalseV)) {
16648 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16649 SDValue Neg = DAG.getNegative(C, DL, VT);
16650 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
16651 }
16652 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
16653 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
16654 if (((isOneConstant(FalseV) && LHS == TrueV &&
16655 CCVal == ISD::CondCode::SETNE) ||
16656 (isOneConstant(TrueV) && LHS == FalseV &&
16657 CCVal == ISD::CondCode::SETEQ)) &&
16659 // freeze it to be safe.
16660 LHS = DAG.getFreeze(LHS);
16662 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
16663 }
16664 }
16665
16666 // If both true/false are an xor with 1, pull through the select.
16667 // This can occur after op legalization if both operands are setccs that
16668 // require an xor to invert.
16669 // FIXME: Generalize to other binary ops with identical operand?
16670 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
16671 TrueV.getOperand(1) == FalseV.getOperand(1) &&
16672 isOneConstant(TrueV.getOperand(1)) &&
16673 TrueV.hasOneUse() && FalseV.hasOneUse()) {
16674 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
16675 TrueV.getOperand(0), FalseV.getOperand(0));
16676 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
16677 }
16678
16679 return SDValue();
16680 }
16681 case RISCVISD::BR_CC: {
16682 SDValue LHS = N->getOperand(1);
16683 SDValue RHS = N->getOperand(2);
16684 SDValue CC = N->getOperand(3);
16685 SDLoc DL(N);
16686
16687 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16688 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
16689 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
16690
16691 return SDValue();
16692 }
16693 case ISD::BITREVERSE:
16694 return performBITREVERSECombine(N, DAG, Subtarget);
16695 case ISD::FP_TO_SINT:
16696 case ISD::FP_TO_UINT:
16697 return performFP_TO_INTCombine(N, DCI, Subtarget);
16700 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
16701 case ISD::FCOPYSIGN: {
16702 EVT VT = N->getValueType(0);
16703 if (!VT.isVector())
16704 break;
16705 // There is a form of VFSGNJ which injects the negated sign of its second
16706 // operand. Try and bubble any FNEG up after the extend/round to produce
16707 // this optimized pattern. Avoid modifying cases where FP_ROUND and
16708 // TRUNC=1.
16709 SDValue In2 = N->getOperand(1);
16710 // Avoid cases where the extend/round has multiple uses, as duplicating
16711 // those is typically more expensive than removing a fneg.
16712 if (!In2.hasOneUse())
16713 break;
16714 if (In2.getOpcode() != ISD::FP_EXTEND &&
16715 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
16716 break;
16717 In2 = In2.getOperand(0);
16718 if (In2.getOpcode() != ISD::FNEG)
16719 break;
16720 SDLoc DL(N);
16721 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
16722 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
16723 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
16724 }
16725 case ISD::MGATHER: {
16726 const auto *MGN = cast<MaskedGatherSDNode>(N);
16727 const EVT VT = N->getValueType(0);
16728 SDValue Index = MGN->getIndex();
16729 SDValue ScaleOp = MGN->getScale();
16730 ISD::MemIndexType IndexType = MGN->getIndexType();
16731 assert(!MGN->isIndexScaled() &&
16732 "Scaled gather/scatter should not be formed");
16733
16734 SDLoc DL(N);
16735 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16736 return DAG.getMaskedGather(
16737 N->getVTList(), MGN->getMemoryVT(), DL,
16738 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16739 MGN->getBasePtr(), Index, ScaleOp},
16740 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16741
16742 if (narrowIndex(Index, IndexType, DAG))
16743 return DAG.getMaskedGather(
16744 N->getVTList(), MGN->getMemoryVT(), DL,
16745 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16746 MGN->getBasePtr(), Index, ScaleOp},
16747 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16748
16749 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
16750 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
16751 // The sequence will be XLenVT, not the type of Index. Tell
16752 // isSimpleVIDSequence this so we avoid overflow.
16753 if (std::optional<VIDSequence> SimpleVID =
16754 isSimpleVIDSequence(Index, Subtarget.getXLen());
16755 SimpleVID && SimpleVID->StepDenominator == 1) {
16756 const int64_t StepNumerator = SimpleVID->StepNumerator;
16757 const int64_t Addend = SimpleVID->Addend;
16758
16759 // Note: We don't need to check alignment here since (by assumption
16760 // from the existance of the gather), our offsets must be sufficiently
16761 // aligned.
16762
16763 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
16764 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
16765 assert(IndexType == ISD::UNSIGNED_SCALED);
16766 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
16767 DAG.getSignedConstant(Addend, DL, PtrVT));
16768
16769 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
16771 SDValue StridedLoad = DAG.getStridedLoadVP(
16772 VT, DL, MGN->getChain(), BasePtr,
16773 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
16774 EVL, MGN->getMemOperand());
16775 SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),
16776 StridedLoad, MGN->getPassThru(), EVL);
16777 return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},
16778 DL);
16779 }
16780 }
16781
16782 SmallVector<int> ShuffleMask;
16783 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16784 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
16785 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
16786 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
16787 MGN->getMask(), DAG.getUNDEF(VT),
16788 MGN->getMemoryVT(), MGN->getMemOperand(),
16790 SDValue Shuffle =
16791 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
16792 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
16793 }
16794
16795 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16796 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
16797 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
16798 SmallVector<SDValue> NewIndices;
16799 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
16800 NewIndices.push_back(Index.getOperand(i));
16801 EVT IndexVT = Index.getValueType()
16803 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
16804
16805 unsigned ElementSize = VT.getScalarStoreSize();
16806 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
16807 auto EltCnt = VT.getVectorElementCount();
16808 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
16809 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
16810 EltCnt.divideCoefficientBy(2));
16811 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
16812 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16813 EltCnt.divideCoefficientBy(2));
16814 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
16815
16816 SDValue Gather =
16817 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
16818 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
16819 Index, ScaleOp},
16820 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
16821 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
16822 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
16823 }
16824 break;
16825 }
16826 case ISD::MSCATTER:{
16827 const auto *MSN = cast<MaskedScatterSDNode>(N);
16828 SDValue Index = MSN->getIndex();
16829 SDValue ScaleOp = MSN->getScale();
16830 ISD::MemIndexType IndexType = MSN->getIndexType();
16831 assert(!MSN->isIndexScaled() &&
16832 "Scaled gather/scatter should not be formed");
16833
16834 SDLoc DL(N);
16835 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16836 return DAG.getMaskedScatter(
16837 N->getVTList(), MSN->getMemoryVT(), DL,
16838 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16839 Index, ScaleOp},
16840 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
16841
16842 if (narrowIndex(Index, IndexType, DAG))
16843 return DAG.getMaskedScatter(
16844 N->getVTList(), MSN->getMemoryVT(), DL,
16845 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16846 Index, ScaleOp},
16847 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
16848
16849 EVT VT = MSN->getValue()->getValueType(0);
16850 SmallVector<int> ShuffleMask;
16851 if (!MSN->isTruncatingStore() &&
16852 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
16853 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
16854 DAG.getUNDEF(VT), ShuffleMask);
16855 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
16856 DAG.getUNDEF(XLenVT), MSN->getMask(),
16857 MSN->getMemoryVT(), MSN->getMemOperand(),
16858 ISD::UNINDEXED, false);
16859 }
16860 break;
16861 }
16862 case ISD::VP_GATHER: {
16863 const auto *VPGN = cast<VPGatherSDNode>(N);
16864 SDValue Index = VPGN->getIndex();
16865 SDValue ScaleOp = VPGN->getScale();
16866 ISD::MemIndexType IndexType = VPGN->getIndexType();
16867 assert(!VPGN->isIndexScaled() &&
16868 "Scaled gather/scatter should not be formed");
16869
16870 SDLoc DL(N);
16871 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16872 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
16873 {VPGN->getChain(), VPGN->getBasePtr(), Index,
16874 ScaleOp, VPGN->getMask(),
16875 VPGN->getVectorLength()},
16876 VPGN->getMemOperand(), IndexType);
16877
16878 if (narrowIndex(Index, IndexType, DAG))
16879 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
16880 {VPGN->getChain(), VPGN->getBasePtr(), Index,
16881 ScaleOp, VPGN->getMask(),
16882 VPGN->getVectorLength()},
16883 VPGN->getMemOperand(), IndexType);
16884
16885 break;
16886 }
16887 case ISD::VP_SCATTER: {
16888 const auto *VPSN = cast<VPScatterSDNode>(N);
16889 SDValue Index = VPSN->getIndex();
16890 SDValue ScaleOp = VPSN->getScale();
16891 ISD::MemIndexType IndexType = VPSN->getIndexType();
16892 assert(!VPSN->isIndexScaled() &&
16893 "Scaled gather/scatter should not be formed");
16894
16895 SDLoc DL(N);
16896 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16897 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
16898 {VPSN->getChain(), VPSN->getValue(),
16899 VPSN->getBasePtr(), Index, ScaleOp,
16900 VPSN->getMask(), VPSN->getVectorLength()},
16901 VPSN->getMemOperand(), IndexType);
16902
16903 if (narrowIndex(Index, IndexType, DAG))
16904 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
16905 {VPSN->getChain(), VPSN->getValue(),
16906 VPSN->getBasePtr(), Index, ScaleOp,
16907 VPSN->getMask(), VPSN->getVectorLength()},
16908 VPSN->getMemOperand(), IndexType);
16909 break;
16910 }
16911 case RISCVISD::SHL_VL:
16912 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
16913 return V;
16914 [[fallthrough]];
16915 case RISCVISD::SRA_VL:
16916 case RISCVISD::SRL_VL: {
16917 SDValue ShAmt = N->getOperand(1);
16919 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16920 SDLoc DL(N);
16921 SDValue VL = N->getOperand(4);
16922 EVT VT = N->getValueType(0);
16923 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16924 ShAmt.getOperand(1), VL);
16925 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
16926 N->getOperand(2), N->getOperand(3), N->getOperand(4));
16927 }
16928 break;
16929 }
16930 case ISD::SRA:
16931 if (SDValue V = performSRACombine(N, DAG, Subtarget))
16932 return V;
16933 [[fallthrough]];
16934 case ISD::SRL:
16935 case ISD::SHL: {
16936 if (N->getOpcode() == ISD::SHL) {
16937 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
16938 return V;
16939 }
16940 SDValue ShAmt = N->getOperand(1);
16942 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16943 SDLoc DL(N);
16944 EVT VT = N->getValueType(0);
16945 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16946 ShAmt.getOperand(1),
16947 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
16948 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
16949 }
16950 break;
16951 }
16952 case RISCVISD::ADD_VL:
16953 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
16954 return V;
16955 return combineToVWMACC(N, DAG, Subtarget);
16960 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
16961 case RISCVISD::SUB_VL:
16962 case RISCVISD::MUL_VL:
16963 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
16972 return performVFMADD_VLCombine(N, DCI, Subtarget);
16973 case RISCVISD::FADD_VL:
16974 case RISCVISD::FSUB_VL:
16975 case RISCVISD::FMUL_VL:
16977 case RISCVISD::VFWSUB_W_VL: {
16978 if (N->getValueType(0).getVectorElementType() == MVT::f32 &&
16979 !Subtarget.hasVInstructionsF16())
16980 return SDValue();
16981 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
16982 }
16983 case ISD::LOAD:
16984 case ISD::STORE: {
16985 if (DCI.isAfterLegalizeDAG())
16986 if (SDValue V = performMemPairCombine(N, DCI))
16987 return V;
16988
16989 if (N->getOpcode() != ISD::STORE)
16990 break;
16991
16992 auto *Store = cast<StoreSDNode>(N);
16993 SDValue Chain = Store->getChain();
16994 EVT MemVT = Store->getMemoryVT();
16995 SDValue Val = Store->getValue();
16996 SDLoc DL(N);
16997
16998 bool IsScalarizable =
16999 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
17000 Store->isSimple() &&
17001 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
17002 isPowerOf2_64(MemVT.getSizeInBits()) &&
17003 MemVT.getSizeInBits() <= Subtarget.getXLen();
17004
17005 // If sufficiently aligned we can scalarize stores of constant vectors of
17006 // any power-of-two size up to XLen bits, provided that they aren't too
17007 // expensive to materialize.
17008 // vsetivli zero, 2, e8, m1, ta, ma
17009 // vmv.v.i v8, 4
17010 // vse64.v v8, (a0)
17011 // ->
17012 // li a1, 1028
17013 // sh a1, 0(a0)
17014 if (DCI.isBeforeLegalize() && IsScalarizable &&
17016 // Get the constant vector bits
17017 APInt NewC(Val.getValueSizeInBits(), 0);
17018 uint64_t EltSize = Val.getScalarValueSizeInBits();
17019 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
17020 if (Val.getOperand(i).isUndef())
17021 continue;
17022 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
17023 i * EltSize);
17024 }
17025 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
17026
17027 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
17028 true) <= 2 &&
17030 NewVT, *Store->getMemOperand())) {
17031 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
17032 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
17033 Store->getPointerInfo(), Store->getOriginalAlign(),
17034 Store->getMemOperand()->getFlags());
17035 }
17036 }
17037
17038 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
17039 // vsetivli zero, 2, e16, m1, ta, ma
17040 // vle16.v v8, (a0)
17041 // vse16.v v8, (a1)
17042 if (auto *L = dyn_cast<LoadSDNode>(Val);
17043 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
17044 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
17045 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
17046 L->getMemoryVT() == MemVT) {
17047 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
17049 NewVT, *Store->getMemOperand()) &&
17051 NewVT, *L->getMemOperand())) {
17052 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
17053 L->getPointerInfo(), L->getOriginalAlign(),
17054 L->getMemOperand()->getFlags());
17055 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
17056 Store->getPointerInfo(), Store->getOriginalAlign(),
17057 Store->getMemOperand()->getFlags());
17058 }
17059 }
17060
17061 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
17062 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
17063 // any illegal types.
17064 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
17065 (DCI.isAfterLegalizeDAG() &&
17067 isNullConstant(Val.getOperand(1)))) {
17068 SDValue Src = Val.getOperand(0);
17069 MVT VecVT = Src.getSimpleValueType();
17070 // VecVT should be scalable and memory VT should match the element type.
17071 if (!Store->isIndexed() && VecVT.isScalableVector() &&
17072 MemVT == VecVT.getVectorElementType()) {
17073 SDLoc DL(N);
17074 MVT MaskVT = getMaskTypeFor(VecVT);
17075 return DAG.getStoreVP(
17076 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
17077 DAG.getConstant(1, DL, MaskVT),
17078 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
17079 Store->getMemOperand(), Store->getAddressingMode(),
17080 Store->isTruncatingStore(), /*IsCompress*/ false);
17081 }
17082 }
17083
17084 break;
17085 }
17086 case ISD::SPLAT_VECTOR: {
17087 EVT VT = N->getValueType(0);
17088 // Only perform this combine on legal MVT types.
17089 if (!isTypeLegal(VT))
17090 break;
17091 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
17092 DAG, Subtarget))
17093 return Gather;
17094 break;
17095 }
17096 case ISD::BUILD_VECTOR:
17097 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
17098 return V;
17099 break;
17101 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
17102 return V;
17103 break;
17105 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
17106 return V;
17107 break;
17108 case RISCVISD::VFMV_V_F_VL: {
17109 const MVT VT = N->getSimpleValueType(0);
17110 SDValue Passthru = N->getOperand(0);
17111 SDValue Scalar = N->getOperand(1);
17112 SDValue VL = N->getOperand(2);
17113
17114 // If VL is 1, we can use vfmv.s.f.
17115 if (isOneConstant(VL))
17116 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
17117 break;
17118 }
17119 case RISCVISD::VMV_V_X_VL: {
17120 const MVT VT = N->getSimpleValueType(0);
17121 SDValue Passthru = N->getOperand(0);
17122 SDValue Scalar = N->getOperand(1);
17123 SDValue VL = N->getOperand(2);
17124
17125 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
17126 // scalar input.
17127 unsigned ScalarSize = Scalar.getValueSizeInBits();
17128 unsigned EltWidth = VT.getScalarSizeInBits();
17129 if (ScalarSize > EltWidth && Passthru.isUndef())
17130 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
17131 return SDValue(N, 0);
17132
17133 // If VL is 1 and the scalar value won't benefit from immediate, we can
17134 // use vmv.s.x.
17135 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
17136 if (isOneConstant(VL) &&
17137 (!Const || Const->isZero() ||
17138 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
17139 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
17140
17141 break;
17142 }
17143 case RISCVISD::VFMV_S_F_VL: {
17144 SDValue Src = N->getOperand(1);
17145 // Try to remove vector->scalar->vector if the scalar->vector is inserting
17146 // into an undef vector.
17147 // TODO: Could use a vslide or vmv.v.v for non-undef.
17148 if (N->getOperand(0).isUndef() &&
17149 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17150 isNullConstant(Src.getOperand(1)) &&
17151 Src.getOperand(0).getValueType().isScalableVector()) {
17152 EVT VT = N->getValueType(0);
17153 EVT SrcVT = Src.getOperand(0).getValueType();
17155 // Widths match, just return the original vector.
17156 if (SrcVT == VT)
17157 return Src.getOperand(0);
17158 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
17159 }
17160 [[fallthrough]];
17161 }
17162 case RISCVISD::VMV_S_X_VL: {
17163 const MVT VT = N->getSimpleValueType(0);
17164 SDValue Passthru = N->getOperand(0);
17165 SDValue Scalar = N->getOperand(1);
17166 SDValue VL = N->getOperand(2);
17167
17168 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
17169 Scalar.getOperand(0).getValueType() == N->getValueType(0))
17170 return Scalar.getOperand(0);
17171
17172 // Use M1 or smaller to avoid over constraining register allocation
17173 const MVT M1VT = getLMUL1VT(VT);
17174 if (M1VT.bitsLT(VT)) {
17175 SDValue M1Passthru =
17176 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
17177 DAG.getVectorIdxConstant(0, DL));
17178 SDValue Result =
17179 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
17180 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
17181 DAG.getVectorIdxConstant(0, DL));
17182 return Result;
17183 }
17184
17185 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
17186 // higher would involve overly constraining the register allocator for
17187 // no purpose.
17188 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
17189 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
17190 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
17191 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
17192
17193 break;
17194 }
17195 case RISCVISD::VMV_X_S: {
17196 SDValue Vec = N->getOperand(0);
17197 MVT VecVT = N->getOperand(0).getSimpleValueType();
17198 const MVT M1VT = getLMUL1VT(VecVT);
17199 if (M1VT.bitsLT(VecVT)) {
17200 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
17201 DAG.getVectorIdxConstant(0, DL));
17202 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
17203 }
17204 break;
17205 }
17209 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
17210 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
17211 switch (IntNo) {
17212 // By default we do not combine any intrinsic.
17213 default:
17214 return SDValue();
17215 case Intrinsic::riscv_vcpop:
17216 case Intrinsic::riscv_vcpop_mask:
17217 case Intrinsic::riscv_vfirst:
17218 case Intrinsic::riscv_vfirst_mask: {
17219 SDValue VL = N->getOperand(2);
17220 if (IntNo == Intrinsic::riscv_vcpop_mask ||
17221 IntNo == Intrinsic::riscv_vfirst_mask)
17222 VL = N->getOperand(3);
17223 if (!isNullConstant(VL))
17224 return SDValue();
17225 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
17226 SDLoc DL(N);
17227 EVT VT = N->getValueType(0);
17228 if (IntNo == Intrinsic::riscv_vfirst ||
17229 IntNo == Intrinsic::riscv_vfirst_mask)
17230 return DAG.getAllOnesConstant(DL, VT);
17231 return DAG.getConstant(0, DL, VT);
17232 }
17233 }
17234 }
17235 case ISD::BITCAST: {
17237 SDValue N0 = N->getOperand(0);
17238 EVT VT = N->getValueType(0);
17239 EVT SrcVT = N0.getValueType();
17240 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
17241 // type, widen both sides to avoid a trip through memory.
17242 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
17243 VT.isScalarInteger()) {
17244 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
17245 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
17246 Ops[0] = N0;
17247 SDLoc DL(N);
17248 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
17249 N0 = DAG.getBitcast(MVT::i8, N0);
17250 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
17251 }
17252
17253 return SDValue();
17254 }
17255 }
17256
17257 return SDValue();
17258}
17259
17261 EVT XVT, unsigned KeptBits) const {
17262 // For vectors, we don't have a preference..
17263 if (XVT.isVector())
17264 return false;
17265
17266 if (XVT != MVT::i32 && XVT != MVT::i64)
17267 return false;
17268
17269 // We can use sext.w for RV64 or an srai 31 on RV32.
17270 if (KeptBits == 32 || KeptBits == 64)
17271 return true;
17272
17273 // With Zbb we can use sext.h/sext.b.
17274 return Subtarget.hasStdExtZbb() &&
17275 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
17276 KeptBits == 16);
17277}
17278
17280 const SDNode *N, CombineLevel Level) const {
17281 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
17282 N->getOpcode() == ISD::SRL) &&
17283 "Expected shift op");
17284
17285 // The following folds are only desirable if `(OP _, c1 << c2)` can be
17286 // materialised in fewer instructions than `(OP _, c1)`:
17287 //
17288 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
17289 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
17290 SDValue N0 = N->getOperand(0);
17291 EVT Ty = N0.getValueType();
17292 if (Ty.isScalarInteger() &&
17293 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
17294 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17295 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17296 if (C1 && C2) {
17297 const APInt &C1Int = C1->getAPIntValue();
17298 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
17299
17300 // We can materialise `c1 << c2` into an add immediate, so it's "free",
17301 // and the combine should happen, to potentially allow further combines
17302 // later.
17303 if (ShiftedC1Int.getSignificantBits() <= 64 &&
17304 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
17305 return true;
17306
17307 // We can materialise `c1` in an add immediate, so it's "free", and the
17308 // combine should be prevented.
17309 if (C1Int.getSignificantBits() <= 64 &&
17311 return false;
17312
17313 // Neither constant will fit into an immediate, so find materialisation
17314 // costs.
17315 int C1Cost =
17316 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
17317 /*CompressionCost*/ true);
17318 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
17319 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
17320 /*CompressionCost*/ true);
17321
17322 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
17323 // combine should be prevented.
17324 if (C1Cost < ShiftedC1Cost)
17325 return false;
17326 }
17327 }
17328 return true;
17329}
17330
17332 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
17333 TargetLoweringOpt &TLO) const {
17334 // Delay this optimization as late as possible.
17335 if (!TLO.LegalOps)
17336 return false;
17337
17338 EVT VT = Op.getValueType();
17339 if (VT.isVector())
17340 return false;
17341
17342 unsigned Opcode = Op.getOpcode();
17343 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
17344 return false;
17345
17346 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17347 if (!C)
17348 return false;
17349
17350 const APInt &Mask = C->getAPIntValue();
17351
17352 // Clear all non-demanded bits initially.
17353 APInt ShrunkMask = Mask & DemandedBits;
17354
17355 // Try to make a smaller immediate by setting undemanded bits.
17356
17357 APInt ExpandedMask = Mask | ~DemandedBits;
17358
17359 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
17360 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
17361 };
17362 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
17363 if (NewMask == Mask)
17364 return true;
17365 SDLoc DL(Op);
17366 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
17367 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
17368 Op.getOperand(0), NewC);
17369 return TLO.CombineTo(Op, NewOp);
17370 };
17371
17372 // If the shrunk mask fits in sign extended 12 bits, let the target
17373 // independent code apply it.
17374 if (ShrunkMask.isSignedIntN(12))
17375 return false;
17376
17377 // And has a few special cases for zext.
17378 if (Opcode == ISD::AND) {
17379 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
17380 // otherwise use SLLI + SRLI.
17381 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
17382 if (IsLegalMask(NewMask))
17383 return UseMask(NewMask);
17384
17385 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
17386 if (VT == MVT::i64) {
17387 APInt NewMask = APInt(64, 0xffffffff);
17388 if (IsLegalMask(NewMask))
17389 return UseMask(NewMask);
17390 }
17391 }
17392
17393 // For the remaining optimizations, we need to be able to make a negative
17394 // number through a combination of mask and undemanded bits.
17395 if (!ExpandedMask.isNegative())
17396 return false;
17397
17398 // What is the fewest number of bits we need to represent the negative number.
17399 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
17400
17401 // Try to make a 12 bit negative immediate. If that fails try to make a 32
17402 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
17403 // If we can't create a simm12, we shouldn't change opaque constants.
17404 APInt NewMask = ShrunkMask;
17405 if (MinSignedBits <= 12)
17406 NewMask.setBitsFrom(11);
17407 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
17408 NewMask.setBitsFrom(31);
17409 else
17410 return false;
17411
17412 // Check that our new mask is a subset of the demanded mask.
17413 assert(IsLegalMask(NewMask));
17414 return UseMask(NewMask);
17415}
17416
17417static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
17418 static const uint64_t GREVMasks[] = {
17419 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
17420 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
17421
17422 for (unsigned Stage = 0; Stage != 6; ++Stage) {
17423 unsigned Shift = 1 << Stage;
17424 if (ShAmt & Shift) {
17425 uint64_t Mask = GREVMasks[Stage];
17426 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
17427 if (IsGORC)
17428 Res |= x;
17429 x = Res;
17430 }
17431 }
17432
17433 return x;
17434}
17435
17437 KnownBits &Known,
17438 const APInt &DemandedElts,
17439 const SelectionDAG &DAG,
17440 unsigned Depth) const {
17441 unsigned BitWidth = Known.getBitWidth();
17442 unsigned Opc = Op.getOpcode();
17443 assert((Opc >= ISD::BUILTIN_OP_END ||
17444 Opc == ISD::INTRINSIC_WO_CHAIN ||
17445 Opc == ISD::INTRINSIC_W_CHAIN ||
17446 Opc == ISD::INTRINSIC_VOID) &&
17447 "Should use MaskedValueIsZero if you don't know whether Op"
17448 " is a target node!");
17449
17450 Known.resetAll();
17451 switch (Opc) {
17452 default: break;
17453 case RISCVISD::SELECT_CC: {
17454 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
17455 // If we don't know any bits, early out.
17456 if (Known.isUnknown())
17457 break;
17458 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
17459
17460 // Only known if known in both the LHS and RHS.
17461 Known = Known.intersectWith(Known2);
17462 break;
17463 }
17466 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17467 // Result is either all zero or operand 0. We can propagate zeros, but not
17468 // ones.
17469 Known.One.clearAllBits();
17470 break;
17471 case RISCVISD::REMUW: {
17472 KnownBits Known2;
17473 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17474 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17475 // We only care about the lower 32 bits.
17476 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
17477 // Restore the original width by sign extending.
17478 Known = Known.sext(BitWidth);
17479 break;
17480 }
17481 case RISCVISD::DIVUW: {
17482 KnownBits Known2;
17483 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17484 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17485 // We only care about the lower 32 bits.
17486 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
17487 // Restore the original width by sign extending.
17488 Known = Known.sext(BitWidth);
17489 break;
17490 }
17491 case RISCVISD::SLLW: {
17492 KnownBits Known2;
17493 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17494 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17495 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
17496 // Restore the original width by sign extending.
17497 Known = Known.sext(BitWidth);
17498 break;
17499 }
17500 case RISCVISD::CTZW: {
17501 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17502 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
17503 unsigned LowBits = llvm::bit_width(PossibleTZ);
17504 Known.Zero.setBitsFrom(LowBits);
17505 break;
17506 }
17507 case RISCVISD::CLZW: {
17508 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17509 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
17510 unsigned LowBits = llvm::bit_width(PossibleLZ);
17511 Known.Zero.setBitsFrom(LowBits);
17512 break;
17513 }
17514 case RISCVISD::BREV8:
17515 case RISCVISD::ORC_B: {
17516 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
17517 // control value of 7 is equivalent to brev8 and orc.b.
17518 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17519 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
17520 // To compute zeros, we need to invert the value and invert it back after.
17521 Known.Zero =
17522 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
17523 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
17524 break;
17525 }
17526 case RISCVISD::READ_VLENB: {
17527 // We can use the minimum and maximum VLEN values to bound VLENB. We
17528 // know VLEN must be a power of two.
17529 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
17530 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
17531 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
17532 Known.Zero.setLowBits(Log2_32(MinVLenB));
17533 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
17534 if (MaxVLenB == MinVLenB)
17535 Known.One.setBit(Log2_32(MinVLenB));
17536 break;
17537 }
17538 case RISCVISD::FCLASS: {
17539 // fclass will only set one of the low 10 bits.
17540 Known.Zero.setBitsFrom(10);
17541 break;
17542 }
17545 unsigned IntNo =
17546 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
17547 switch (IntNo) {
17548 default:
17549 // We can't do anything for most intrinsics.
17550 break;
17551 case Intrinsic::riscv_vsetvli:
17552 case Intrinsic::riscv_vsetvlimax: {
17553 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
17554 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
17555 RISCVII::VLMUL VLMUL =
17556 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
17557 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
17558 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
17559 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
17560 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
17561
17562 // Result of vsetvli must be not larger than AVL.
17563 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
17564 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
17565
17566 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
17567 if (BitWidth > KnownZeroFirstBit)
17568 Known.Zero.setBitsFrom(KnownZeroFirstBit);
17569 break;
17570 }
17571 }
17572 break;
17573 }
17574 }
17575}
17576
17578 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17579 unsigned Depth) const {
17580 switch (Op.getOpcode()) {
17581 default:
17582 break;
17583 case RISCVISD::SELECT_CC: {
17584 unsigned Tmp =
17585 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
17586 if (Tmp == 1) return 1; // Early out.
17587 unsigned Tmp2 =
17588 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
17589 return std::min(Tmp, Tmp2);
17590 }
17593 // Output is either all zero or operand 0. We can propagate sign bit count
17594 // from operand 0.
17595 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17596 case RISCVISD::ABSW: {
17597 // We expand this at isel to negw+max. The result will have 33 sign bits
17598 // if the input has at least 33 sign bits.
17599 unsigned Tmp =
17600 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17601 if (Tmp < 33) return 1;
17602 return 33;
17603 }
17604 case RISCVISD::SLLW:
17605 case RISCVISD::SRAW:
17606 case RISCVISD::SRLW:
17607 case RISCVISD::DIVW:
17608 case RISCVISD::DIVUW:
17609 case RISCVISD::REMUW:
17610 case RISCVISD::ROLW:
17611 case RISCVISD::RORW:
17616 // TODO: As the result is sign-extended, this is conservatively correct. A
17617 // more precise answer could be calculated for SRAW depending on known
17618 // bits in the shift amount.
17619 return 33;
17620 case RISCVISD::VMV_X_S: {
17621 // The number of sign bits of the scalar result is computed by obtaining the
17622 // element type of the input vector operand, subtracting its width from the
17623 // XLEN, and then adding one (sign bit within the element type). If the
17624 // element type is wider than XLen, the least-significant XLEN bits are
17625 // taken.
17626 unsigned XLen = Subtarget.getXLen();
17627 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
17628 if (EltBits <= XLen)
17629 return XLen - EltBits + 1;
17630 break;
17631 }
17633 unsigned IntNo = Op.getConstantOperandVal(1);
17634 switch (IntNo) {
17635 default:
17636 break;
17637 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
17638 case Intrinsic::riscv_masked_atomicrmw_add_i64:
17639 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
17640 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
17641 case Intrinsic::riscv_masked_atomicrmw_max_i64:
17642 case Intrinsic::riscv_masked_atomicrmw_min_i64:
17643 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
17644 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
17645 case Intrinsic::riscv_masked_cmpxchg_i64:
17646 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
17647 // narrow atomic operation. These are implemented using atomic
17648 // operations at the minimum supported atomicrmw/cmpxchg width whose
17649 // result is then sign extended to XLEN. With +A, the minimum width is
17650 // 32 for both 64 and 32.
17651 assert(Subtarget.getXLen() == 64);
17653 assert(Subtarget.hasStdExtA());
17654 return 33;
17655 }
17656 break;
17657 }
17658 }
17659
17660 return 1;
17661}
17662
17664 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17665 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
17666
17667 // TODO: Add more target nodes.
17668 switch (Op.getOpcode()) {
17670 // Integer select_cc cannot create poison.
17671 // TODO: What are the FP poison semantics?
17672 // TODO: This instruction blocks poison from the unselected operand, can
17673 // we do anything with that?
17674 return !Op.getValueType().isInteger();
17675 }
17677 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
17678}
17679
17680const Constant *
17682 assert(Ld && "Unexpected null LoadSDNode");
17683 if (!ISD::isNormalLoad(Ld))
17684 return nullptr;
17685
17686 SDValue Ptr = Ld->getBasePtr();
17687
17688 // Only constant pools with no offset are supported.
17689 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
17690 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
17691 if (!CNode || CNode->isMachineConstantPoolEntry() ||
17692 CNode->getOffset() != 0)
17693 return nullptr;
17694
17695 return CNode;
17696 };
17697
17698 // Simple case, LLA.
17699 if (Ptr.getOpcode() == RISCVISD::LLA) {
17700 auto *CNode = GetSupportedConstantPool(Ptr);
17701 if (!CNode || CNode->getTargetFlags() != 0)
17702 return nullptr;
17703
17704 return CNode->getConstVal();
17705 }
17706
17707 // Look for a HI and ADD_LO pair.
17708 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
17709 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
17710 return nullptr;
17711
17712 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
17713 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
17714
17715 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
17716 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
17717 return nullptr;
17718
17719 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
17720 return nullptr;
17721
17722 return CNodeLo->getConstVal();
17723}
17724
17726 MachineBasicBlock *BB) {
17727 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
17728
17729 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
17730 // Should the count have wrapped while it was being read, we need to try
17731 // again.
17732 // For example:
17733 // ```
17734 // read:
17735 // csrrs x3, counterh # load high word of counter
17736 // csrrs x2, counter # load low word of counter
17737 // csrrs x4, counterh # load high word of counter
17738 // bne x3, x4, read # check if high word reads match, otherwise try again
17739 // ```
17740
17741 MachineFunction &MF = *BB->getParent();
17742 const BasicBlock *LLVMBB = BB->getBasicBlock();
17744
17745 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
17746 MF.insert(It, LoopMBB);
17747
17748 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
17749 MF.insert(It, DoneMBB);
17750
17751 // Transfer the remainder of BB and its successor edges to DoneMBB.
17752 DoneMBB->splice(DoneMBB->begin(), BB,
17753 std::next(MachineBasicBlock::iterator(MI)), BB->end());
17755
17756 BB->addSuccessor(LoopMBB);
17757
17759 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17760 Register LoReg = MI.getOperand(0).getReg();
17761 Register HiReg = MI.getOperand(1).getReg();
17762 int64_t LoCounter = MI.getOperand(2).getImm();
17763 int64_t HiCounter = MI.getOperand(3).getImm();
17764 DebugLoc DL = MI.getDebugLoc();
17765
17767 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
17768 .addImm(HiCounter)
17769 .addReg(RISCV::X0);
17770 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
17771 .addImm(LoCounter)
17772 .addReg(RISCV::X0);
17773 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
17774 .addImm(HiCounter)
17775 .addReg(RISCV::X0);
17776
17777 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
17778 .addReg(HiReg)
17779 .addReg(ReadAgainReg)
17780 .addMBB(LoopMBB);
17781
17782 LoopMBB->addSuccessor(LoopMBB);
17783 LoopMBB->addSuccessor(DoneMBB);
17784
17785 MI.eraseFromParent();
17786
17787 return DoneMBB;
17788}
17789
17792 const RISCVSubtarget &Subtarget) {
17793 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
17794
17795 MachineFunction &MF = *BB->getParent();
17796 DebugLoc DL = MI.getDebugLoc();
17799 Register LoReg = MI.getOperand(0).getReg();
17800 Register HiReg = MI.getOperand(1).getReg();
17801 Register SrcReg = MI.getOperand(2).getReg();
17802
17803 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
17804 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17805
17806 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
17807 RI, Register());
17809 MachineMemOperand *MMOLo =
17813 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
17814 .addFrameIndex(FI)
17815 .addImm(0)
17816 .addMemOperand(MMOLo);
17817 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
17818 .addFrameIndex(FI)
17819 .addImm(4)
17820 .addMemOperand(MMOHi);
17821 MI.eraseFromParent(); // The pseudo instruction is gone now.
17822 return BB;
17823}
17824
17827 const RISCVSubtarget &Subtarget) {
17828 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
17829 "Unexpected instruction");
17830
17831 MachineFunction &MF = *BB->getParent();
17832 DebugLoc DL = MI.getDebugLoc();
17835 Register DstReg = MI.getOperand(0).getReg();
17836 Register LoReg = MI.getOperand(1).getReg();
17837 Register HiReg = MI.getOperand(2).getReg();
17838
17839 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
17840 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17841
17843 MachineMemOperand *MMOLo =
17847 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17848 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
17849 .addFrameIndex(FI)
17850 .addImm(0)
17851 .addMemOperand(MMOLo);
17852 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17853 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
17854 .addFrameIndex(FI)
17855 .addImm(4)
17856 .addMemOperand(MMOHi);
17857 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
17858 MI.eraseFromParent(); // The pseudo instruction is gone now.
17859 return BB;
17860}
17861
17863 switch (MI.getOpcode()) {
17864 default:
17865 return false;
17866 case RISCV::Select_GPR_Using_CC_GPR:
17867 case RISCV::Select_GPR_Using_CC_Imm:
17868 case RISCV::Select_FPR16_Using_CC_GPR:
17869 case RISCV::Select_FPR16INX_Using_CC_GPR:
17870 case RISCV::Select_FPR32_Using_CC_GPR:
17871 case RISCV::Select_FPR32INX_Using_CC_GPR:
17872 case RISCV::Select_FPR64_Using_CC_GPR:
17873 case RISCV::Select_FPR64INX_Using_CC_GPR:
17874 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
17875 return true;
17876 }
17877}
17878
17880 unsigned RelOpcode, unsigned EqOpcode,
17881 const RISCVSubtarget &Subtarget) {
17882 DebugLoc DL = MI.getDebugLoc();
17883 Register DstReg = MI.getOperand(0).getReg();
17884 Register Src1Reg = MI.getOperand(1).getReg();
17885 Register Src2Reg = MI.getOperand(2).getReg();
17887 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17889
17890 // Save the current FFLAGS.
17891 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
17892
17893 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
17894 .addReg(Src1Reg)
17895 .addReg(Src2Reg);
17898
17899 // Restore the FFLAGS.
17900 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
17901 .addReg(SavedFFlags, RegState::Kill);
17902
17903 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
17904 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
17905 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
17906 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
17909
17910 // Erase the pseudoinstruction.
17911 MI.eraseFromParent();
17912 return BB;
17913}
17914
17915static MachineBasicBlock *
17917 MachineBasicBlock *ThisMBB,
17918 const RISCVSubtarget &Subtarget) {
17919 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
17920 // Without this, custom-inserter would have generated:
17921 //
17922 // A
17923 // | \
17924 // | B
17925 // | /
17926 // C
17927 // | \
17928 // | D
17929 // | /
17930 // E
17931 //
17932 // A: X = ...; Y = ...
17933 // B: empty
17934 // C: Z = PHI [X, A], [Y, B]
17935 // D: empty
17936 // E: PHI [X, C], [Z, D]
17937 //
17938 // If we lower both Select_FPRX_ in a single step, we can instead generate:
17939 //
17940 // A
17941 // | \
17942 // | C
17943 // | /|
17944 // |/ |
17945 // | |
17946 // | D
17947 // | /
17948 // E
17949 //
17950 // A: X = ...; Y = ...
17951 // D: empty
17952 // E: PHI [X, A], [X, C], [Y, D]
17953
17954 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17955 const DebugLoc &DL = First.getDebugLoc();
17956 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
17957 MachineFunction *F = ThisMBB->getParent();
17958 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
17959 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
17960 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
17961 MachineFunction::iterator It = ++ThisMBB->getIterator();
17962 F->insert(It, FirstMBB);
17963 F->insert(It, SecondMBB);
17964 F->insert(It, SinkMBB);
17965
17966 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
17967 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
17969 ThisMBB->end());
17970 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
17971
17972 // Fallthrough block for ThisMBB.
17973 ThisMBB->addSuccessor(FirstMBB);
17974 // Fallthrough block for FirstMBB.
17975 FirstMBB->addSuccessor(SecondMBB);
17976 ThisMBB->addSuccessor(SinkMBB);
17977 FirstMBB->addSuccessor(SinkMBB);
17978 // This is fallthrough.
17979 SecondMBB->addSuccessor(SinkMBB);
17980
17981 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
17982 Register FLHS = First.getOperand(1).getReg();
17983 Register FRHS = First.getOperand(2).getReg();
17984 // Insert appropriate branch.
17985 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
17986 .addReg(FLHS)
17987 .addReg(FRHS)
17988 .addMBB(SinkMBB);
17989
17990 Register SLHS = Second.getOperand(1).getReg();
17991 Register SRHS = Second.getOperand(2).getReg();
17992 Register Op1Reg4 = First.getOperand(4).getReg();
17993 Register Op1Reg5 = First.getOperand(5).getReg();
17994
17995 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
17996 // Insert appropriate branch.
17997 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
17998 .addReg(SLHS)
17999 .addReg(SRHS)
18000 .addMBB(SinkMBB);
18001
18002 Register DestReg = Second.getOperand(0).getReg();
18003 Register Op2Reg4 = Second.getOperand(4).getReg();
18004 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
18005 .addReg(Op2Reg4)
18006 .addMBB(ThisMBB)
18007 .addReg(Op1Reg4)
18008 .addMBB(FirstMBB)
18009 .addReg(Op1Reg5)
18010 .addMBB(SecondMBB);
18011
18012 // Now remove the Select_FPRX_s.
18013 First.eraseFromParent();
18014 Second.eraseFromParent();
18015 return SinkMBB;
18016}
18017
18020 const RISCVSubtarget &Subtarget) {
18021 // To "insert" Select_* instructions, we actually have to insert the triangle
18022 // control-flow pattern. The incoming instructions know the destination vreg
18023 // to set, the condition code register to branch on, the true/false values to
18024 // select between, and the condcode to use to select the appropriate branch.
18025 //
18026 // We produce the following control flow:
18027 // HeadMBB
18028 // | \
18029 // | IfFalseMBB
18030 // | /
18031 // TailMBB
18032 //
18033 // When we find a sequence of selects we attempt to optimize their emission
18034 // by sharing the control flow. Currently we only handle cases where we have
18035 // multiple selects with the exact same condition (same LHS, RHS and CC).
18036 // The selects may be interleaved with other instructions if the other
18037 // instructions meet some requirements we deem safe:
18038 // - They are not pseudo instructions.
18039 // - They are debug instructions. Otherwise,
18040 // - They do not have side-effects, do not access memory and their inputs do
18041 // not depend on the results of the select pseudo-instructions.
18042 // The TrueV/FalseV operands of the selects cannot depend on the result of
18043 // previous selects in the sequence.
18044 // These conditions could be further relaxed. See the X86 target for a
18045 // related approach and more information.
18046 //
18047 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
18048 // is checked here and handled by a separate function -
18049 // EmitLoweredCascadedSelect.
18050
18051 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
18052 if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
18053 MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
18054 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
18055 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
18056 Next->getOperand(5).isKill())
18057 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
18058
18059 Register LHS = MI.getOperand(1).getReg();
18060 Register RHS;
18061 if (MI.getOperand(2).isReg())
18062 RHS = MI.getOperand(2).getReg();
18063 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
18064
18065 SmallVector<MachineInstr *, 4> SelectDebugValues;
18066 SmallSet<Register, 4> SelectDests;
18067 SelectDests.insert(MI.getOperand(0).getReg());
18068
18069 MachineInstr *LastSelectPseudo = &MI;
18070 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
18071 SequenceMBBI != E; ++SequenceMBBI) {
18072 if (SequenceMBBI->isDebugInstr())
18073 continue;
18074 if (isSelectPseudo(*SequenceMBBI)) {
18075 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
18076 !SequenceMBBI->getOperand(2).isReg() ||
18077 SequenceMBBI->getOperand(2).getReg() != RHS ||
18078 SequenceMBBI->getOperand(3).getImm() != CC ||
18079 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
18080 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
18081 break;
18082 LastSelectPseudo = &*SequenceMBBI;
18083 SequenceMBBI->collectDebugValues(SelectDebugValues);
18084 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
18085 continue;
18086 }
18087 if (SequenceMBBI->hasUnmodeledSideEffects() ||
18088 SequenceMBBI->mayLoadOrStore() ||
18089 SequenceMBBI->usesCustomInsertionHook())
18090 break;
18091 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
18092 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
18093 }))
18094 break;
18095 }
18096
18097 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
18098 const BasicBlock *LLVM_BB = BB->getBasicBlock();
18099 DebugLoc DL = MI.getDebugLoc();
18101
18102 MachineBasicBlock *HeadMBB = BB;
18103 MachineFunction *F = BB->getParent();
18104 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
18105 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
18106
18107 F->insert(I, IfFalseMBB);
18108 F->insert(I, TailMBB);
18109
18110 // Set the call frame size on entry to the new basic blocks.
18111 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
18112 IfFalseMBB->setCallFrameSize(CallFrameSize);
18113 TailMBB->setCallFrameSize(CallFrameSize);
18114
18115 // Transfer debug instructions associated with the selects to TailMBB.
18116 for (MachineInstr *DebugInstr : SelectDebugValues) {
18117 TailMBB->push_back(DebugInstr->removeFromParent());
18118 }
18119
18120 // Move all instructions after the sequence to TailMBB.
18121 TailMBB->splice(TailMBB->end(), HeadMBB,
18122 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
18123 // Update machine-CFG edges by transferring all successors of the current
18124 // block to the new block which will contain the Phi nodes for the selects.
18125 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
18126 // Set the successors for HeadMBB.
18127 HeadMBB->addSuccessor(IfFalseMBB);
18128 HeadMBB->addSuccessor(TailMBB);
18129
18130 // Insert appropriate branch.
18131 if (MI.getOperand(2).isImm())
18132 BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm()))
18133 .addReg(LHS)
18134 .addImm(MI.getOperand(2).getImm())
18135 .addMBB(TailMBB);
18136 else
18137 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
18138 .addReg(LHS)
18139 .addReg(RHS)
18140 .addMBB(TailMBB);
18141
18142 // IfFalseMBB just falls through to TailMBB.
18143 IfFalseMBB->addSuccessor(TailMBB);
18144
18145 // Create PHIs for all of the select pseudo-instructions.
18146 auto SelectMBBI = MI.getIterator();
18147 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
18148 auto InsertionPoint = TailMBB->begin();
18149 while (SelectMBBI != SelectEnd) {
18150 auto Next = std::next(SelectMBBI);
18151 if (isSelectPseudo(*SelectMBBI)) {
18152 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
18153 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
18154 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
18155 .addReg(SelectMBBI->getOperand(4).getReg())
18156 .addMBB(HeadMBB)
18157 .addReg(SelectMBBI->getOperand(5).getReg())
18158 .addMBB(IfFalseMBB);
18159 SelectMBBI->eraseFromParent();
18160 }
18161 SelectMBBI = Next;
18162 }
18163
18164 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
18165 return TailMBB;
18166}
18167
18168// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
18169static const RISCV::RISCVMaskedPseudoInfo *
18170lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
18172 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
18173 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
18175 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
18176 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
18177 return Masked;
18178}
18179
18182 unsigned CVTXOpc) {
18183 DebugLoc DL = MI.getDebugLoc();
18184
18186
18188 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18189
18190 // Save the old value of FFLAGS.
18191 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
18192
18193 assert(MI.getNumOperands() == 7);
18194
18195 // Emit a VFCVT_X_F
18196 const TargetRegisterInfo *TRI =
18198 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
18199 Register Tmp = MRI.createVirtualRegister(RC);
18200 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
18201 .add(MI.getOperand(1))
18202 .add(MI.getOperand(2))
18203 .add(MI.getOperand(3))
18204 .add(MachineOperand::CreateImm(7)) // frm = DYN
18205 .add(MI.getOperand(4))
18206 .add(MI.getOperand(5))
18207 .add(MI.getOperand(6))
18208 .add(MachineOperand::CreateReg(RISCV::FRM,
18209 /*IsDef*/ false,
18210 /*IsImp*/ true));
18211
18212 // Emit a VFCVT_F_X
18213 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
18214 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
18215 // There is no E8 variant for VFCVT_F_X.
18216 assert(Log2SEW >= 4);
18217 unsigned CVTFOpc =
18218 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
18219 ->MaskedPseudo;
18220
18221 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
18222 .add(MI.getOperand(0))
18223 .add(MI.getOperand(1))
18224 .addReg(Tmp)
18225 .add(MI.getOperand(3))
18226 .add(MachineOperand::CreateImm(7)) // frm = DYN
18227 .add(MI.getOperand(4))
18228 .add(MI.getOperand(5))
18229 .add(MI.getOperand(6))
18230 .add(MachineOperand::CreateReg(RISCV::FRM,
18231 /*IsDef*/ false,
18232 /*IsImp*/ true));
18233
18234 // Restore FFLAGS.
18235 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
18236 .addReg(SavedFFLAGS, RegState::Kill);
18237
18238 // Erase the pseudoinstruction.
18239 MI.eraseFromParent();
18240 return BB;
18241}
18242
18244 const RISCVSubtarget &Subtarget) {
18245 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
18246 const TargetRegisterClass *RC;
18247 switch (MI.getOpcode()) {
18248 default:
18249 llvm_unreachable("Unexpected opcode");
18250 case RISCV::PseudoFROUND_H:
18251 CmpOpc = RISCV::FLT_H;
18252 F2IOpc = RISCV::FCVT_W_H;
18253 I2FOpc = RISCV::FCVT_H_W;
18254 FSGNJOpc = RISCV::FSGNJ_H;
18255 FSGNJXOpc = RISCV::FSGNJX_H;
18256 RC = &RISCV::FPR16RegClass;
18257 break;
18258 case RISCV::PseudoFROUND_H_INX:
18259 CmpOpc = RISCV::FLT_H_INX;
18260 F2IOpc = RISCV::FCVT_W_H_INX;
18261 I2FOpc = RISCV::FCVT_H_W_INX;
18262 FSGNJOpc = RISCV::FSGNJ_H_INX;
18263 FSGNJXOpc = RISCV::FSGNJX_H_INX;
18264 RC = &RISCV::GPRF16RegClass;
18265 break;
18266 case RISCV::PseudoFROUND_S:
18267 CmpOpc = RISCV::FLT_S;
18268 F2IOpc = RISCV::FCVT_W_S;
18269 I2FOpc = RISCV::FCVT_S_W;
18270 FSGNJOpc = RISCV::FSGNJ_S;
18271 FSGNJXOpc = RISCV::FSGNJX_S;
18272 RC = &RISCV::FPR32RegClass;
18273 break;
18274 case RISCV::PseudoFROUND_S_INX:
18275 CmpOpc = RISCV::FLT_S_INX;
18276 F2IOpc = RISCV::FCVT_W_S_INX;
18277 I2FOpc = RISCV::FCVT_S_W_INX;
18278 FSGNJOpc = RISCV::FSGNJ_S_INX;
18279 FSGNJXOpc = RISCV::FSGNJX_S_INX;
18280 RC = &RISCV::GPRF32RegClass;
18281 break;
18282 case RISCV::PseudoFROUND_D:
18283 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
18284 CmpOpc = RISCV::FLT_D;
18285 F2IOpc = RISCV::FCVT_L_D;
18286 I2FOpc = RISCV::FCVT_D_L;
18287 FSGNJOpc = RISCV::FSGNJ_D;
18288 FSGNJXOpc = RISCV::FSGNJX_D;
18289 RC = &RISCV::FPR64RegClass;
18290 break;
18291 case RISCV::PseudoFROUND_D_INX:
18292 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
18293 CmpOpc = RISCV::FLT_D_INX;
18294 F2IOpc = RISCV::FCVT_L_D_INX;
18295 I2FOpc = RISCV::FCVT_D_L_INX;
18296 FSGNJOpc = RISCV::FSGNJ_D_INX;
18297 FSGNJXOpc = RISCV::FSGNJX_D_INX;
18298 RC = &RISCV::GPRRegClass;
18299 break;
18300 }
18301
18302 const BasicBlock *BB = MBB->getBasicBlock();
18303 DebugLoc DL = MI.getDebugLoc();
18305
18307 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
18308 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
18309
18310 F->insert(I, CvtMBB);
18311 F->insert(I, DoneMBB);
18312 // Move all instructions after the sequence to DoneMBB.
18313 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
18314 MBB->end());
18315 // Update machine-CFG edges by transferring all successors of the current
18316 // block to the new block which will contain the Phi nodes for the selects.
18318 // Set the successors for MBB.
18319 MBB->addSuccessor(CvtMBB);
18320 MBB->addSuccessor(DoneMBB);
18321
18322 Register DstReg = MI.getOperand(0).getReg();
18323 Register SrcReg = MI.getOperand(1).getReg();
18324 Register MaxReg = MI.getOperand(2).getReg();
18325 int64_t FRM = MI.getOperand(3).getImm();
18326
18327 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
18329
18330 Register FabsReg = MRI.createVirtualRegister(RC);
18331 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
18332
18333 // Compare the FP value to the max value.
18334 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18335 auto MIB =
18336 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
18339
18340 // Insert branch.
18341 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
18342 .addReg(CmpReg)
18343 .addReg(RISCV::X0)
18344 .addMBB(DoneMBB);
18345
18346 CvtMBB->addSuccessor(DoneMBB);
18347
18348 // Convert to integer.
18349 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18350 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
18353
18354 // Convert back to FP.
18355 Register I2FReg = MRI.createVirtualRegister(RC);
18356 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
18359
18360 // Restore the sign bit.
18361 Register CvtReg = MRI.createVirtualRegister(RC);
18362 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
18363
18364 // Merge the results.
18365 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
18366 .addReg(SrcReg)
18367 .addMBB(MBB)
18368 .addReg(CvtReg)
18369 .addMBB(CvtMBB);
18370
18371 MI.eraseFromParent();
18372 return DoneMBB;
18373}
18374
18377 MachineBasicBlock *BB) const {
18378 switch (MI.getOpcode()) {
18379 default:
18380 llvm_unreachable("Unexpected instr type to insert");
18381 case RISCV::ReadCounterWide:
18382 assert(!Subtarget.is64Bit() &&
18383 "ReadCounterWide is only to be used on riscv32");
18384 return emitReadCounterWidePseudo(MI, BB);
18385 case RISCV::Select_GPR_Using_CC_GPR:
18386 case RISCV::Select_GPR_Using_CC_Imm:
18387 case RISCV::Select_FPR16_Using_CC_GPR:
18388 case RISCV::Select_FPR16INX_Using_CC_GPR:
18389 case RISCV::Select_FPR32_Using_CC_GPR:
18390 case RISCV::Select_FPR32INX_Using_CC_GPR:
18391 case RISCV::Select_FPR64_Using_CC_GPR:
18392 case RISCV::Select_FPR64INX_Using_CC_GPR:
18393 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
18394 return emitSelectPseudo(MI, BB, Subtarget);
18395 case RISCV::BuildPairF64Pseudo:
18396 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
18397 case RISCV::SplitF64Pseudo:
18398 return emitSplitF64Pseudo(MI, BB, Subtarget);
18399 case RISCV::PseudoQuietFLE_H:
18400 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
18401 case RISCV::PseudoQuietFLE_H_INX:
18402 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
18403 case RISCV::PseudoQuietFLT_H:
18404 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
18405 case RISCV::PseudoQuietFLT_H_INX:
18406 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
18407 case RISCV::PseudoQuietFLE_S:
18408 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
18409 case RISCV::PseudoQuietFLE_S_INX:
18410 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
18411 case RISCV::PseudoQuietFLT_S:
18412 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
18413 case RISCV::PseudoQuietFLT_S_INX:
18414 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
18415 case RISCV::PseudoQuietFLE_D:
18416 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
18417 case RISCV::PseudoQuietFLE_D_INX:
18418 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
18419 case RISCV::PseudoQuietFLE_D_IN32X:
18420 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
18421 Subtarget);
18422 case RISCV::PseudoQuietFLT_D:
18423 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
18424 case RISCV::PseudoQuietFLT_D_INX:
18425 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
18426 case RISCV::PseudoQuietFLT_D_IN32X:
18427 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
18428 Subtarget);
18429
18430 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
18431 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
18432 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
18433 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
18434 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
18435 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
18436 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
18437 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
18438 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
18439 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
18440 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
18441 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
18442 case RISCV::PseudoFROUND_H:
18443 case RISCV::PseudoFROUND_H_INX:
18444 case RISCV::PseudoFROUND_S:
18445 case RISCV::PseudoFROUND_S_INX:
18446 case RISCV::PseudoFROUND_D:
18447 case RISCV::PseudoFROUND_D_INX:
18448 case RISCV::PseudoFROUND_D_IN32X:
18449 return emitFROUND(MI, BB, Subtarget);
18450 case TargetOpcode::STATEPOINT:
18451 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
18452 // while jal call instruction (where statepoint will be lowered at the end)
18453 // has implicit def. This def is early-clobber as it will be set at
18454 // the moment of the call and earlier than any use is read.
18455 // Add this implicit dead def here as a workaround.
18456 MI.addOperand(*MI.getMF(),
18458 RISCV::X1, /*isDef*/ true,
18459 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
18460 /*isUndef*/ false, /*isEarlyClobber*/ true));
18461 [[fallthrough]];
18462 case TargetOpcode::STACKMAP:
18463 case TargetOpcode::PATCHPOINT:
18464 if (!Subtarget.is64Bit())
18465 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
18466 "supported on 64-bit targets");
18467 return emitPatchPoint(MI, BB);
18468 }
18469}
18470
18472 SDNode *Node) const {
18473 // Add FRM dependency to any instructions with dynamic rounding mode.
18474 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
18475 if (Idx < 0) {
18476 // Vector pseudos have FRM index indicated by TSFlags.
18477 Idx = RISCVII::getFRMOpNum(MI.getDesc());
18478 if (Idx < 0)
18479 return;
18480 }
18481 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
18482 return;
18483 // If the instruction already reads FRM, don't add another read.
18484 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
18485 return;
18486 MI.addOperand(
18487 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
18488}
18489
18490// Calling Convention Implementation.
18491// The expectations for frontend ABI lowering vary from target to target.
18492// Ideally, an LLVM frontend would be able to avoid worrying about many ABI
18493// details, but this is a longer term goal. For now, we simply try to keep the
18494// role of the frontend as simple and well-defined as possible. The rules can
18495// be summarised as:
18496// * Never split up large scalar arguments. We handle them here.
18497// * If a hardfloat calling convention is being used, and the struct may be
18498// passed in a pair of registers (fp+fp, int+fp), and both registers are
18499// available, then pass as two separate arguments. If either the GPRs or FPRs
18500// are exhausted, then pass according to the rule below.
18501// * If a struct could never be passed in registers or directly in a stack
18502// slot (as it is larger than 2*XLEN and the floating point rules don't
18503// apply), then pass it using a pointer with the byval attribute.
18504// * If a struct is less than 2*XLEN, then coerce to either a two-element
18505// word-sized array or a 2*XLEN scalar (depending on alignment).
18506// * The frontend can determine whether a struct is returned by reference or
18507// not based on its size and fields. If it will be returned by reference, the
18508// frontend must modify the prototype so a pointer with the sret annotation is
18509// passed as the first argument. This is not necessary for large scalar
18510// returns.
18511// * Struct return values and varargs should be coerced to structs containing
18512// register-size fields in the same situations they would be for fixed
18513// arguments.
18514
18515static const MCPhysReg ArgFPR16s[] = {
18516 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
18517 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
18518};
18519static const MCPhysReg ArgFPR32s[] = {
18520 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
18521 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
18522};
18523static const MCPhysReg ArgFPR64s[] = {
18524 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
18525 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
18526};
18527// This is an interim calling convention and it may be changed in the future.
18528static const MCPhysReg ArgVRs[] = {
18529 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
18530 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
18531 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
18532static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
18533 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
18534 RISCV::V20M2, RISCV::V22M2};
18535static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
18536 RISCV::V20M4};
18537static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
18538
18540 // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
18541 // the ILP32E ABI.
18542 static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18543 RISCV::X13, RISCV::X14, RISCV::X15,
18544 RISCV::X16, RISCV::X17};
18545 // The GPRs used for passing arguments in the ILP32E/ILP64E ABI.
18546 static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18547 RISCV::X13, RISCV::X14, RISCV::X15};
18548
18549 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18550 return ArrayRef(ArgEGPRs);
18551
18552 return ArrayRef(ArgIGPRs);
18553}
18554
18556 // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
18557 // for save-restore libcall, so we don't use them.
18558 // Don't use X7 for fastcc, since Zicfilp uses X7 as the label register.
18559 static const MCPhysReg FastCCIGPRs[] = {
18560 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15,
18561 RISCV::X16, RISCV::X17, RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31};
18562
18563 // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
18564 static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18565 RISCV::X13, RISCV::X14, RISCV::X15};
18566
18567 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18568 return ArrayRef(FastCCEGPRs);
18569
18570 return ArrayRef(FastCCIGPRs);
18571}
18572
18573// Pass a 2*XLEN argument that has been split into two XLEN values through
18574// registers or the stack as necessary.
18575static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
18576 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
18577 MVT ValVT2, MVT LocVT2,
18578 ISD::ArgFlagsTy ArgFlags2, bool EABI) {
18579 unsigned XLenInBytes = XLen / 8;
18580 const RISCVSubtarget &STI =
18583
18584 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18585 // At least one half can be passed via register.
18586 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
18587 VA1.getLocVT(), CCValAssign::Full));
18588 } else {
18589 // Both halves must be passed on the stack, with proper alignment.
18590 // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte
18591 // alignment. This behavior may be changed when RV32E/ILP32E is ratified.
18592 Align StackAlign(XLenInBytes);
18593 if (!EABI || XLen != 32)
18594 StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign());
18595 State.addLoc(
18597 State.AllocateStack(XLenInBytes, StackAlign),
18598 VA1.getLocVT(), CCValAssign::Full));
18600 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18601 LocVT2, CCValAssign::Full));
18602 return false;
18603 }
18604
18605 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18606 // The second half can also be passed via register.
18607 State.addLoc(
18608 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
18609 } else {
18610 // The second half is passed via the stack, without additional alignment.
18612 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18613 LocVT2, CCValAssign::Full));
18614 }
18615
18616 return false;
18617}
18618
18619// Implements the RISC-V calling convention. Returns true upon failure.
18620bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
18621 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
18622 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
18623 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
18624 RVVArgDispatcher &RVVDispatcher) {
18625 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
18626 assert(XLen == 32 || XLen == 64);
18627 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
18628
18629 // Static chain parameter must not be passed in normal argument registers,
18630 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
18631 if (ArgFlags.isNest()) {
18632 if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
18633 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18634 return false;
18635 }
18636 }
18637
18638 // Any return value split in to more than two values can't be returned
18639 // directly. Vectors are returned via the available vector registers.
18640 if (!LocVT.isVector() && IsRet && ValNo > 1)
18641 return true;
18642
18643 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
18644 // variadic argument, or if no F16/F32 argument registers are available.
18645 bool UseGPRForF16_F32 = true;
18646 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
18647 // variadic argument, or if no F64 argument registers are available.
18648 bool UseGPRForF64 = true;
18649
18650 switch (ABI) {
18651 default:
18652 llvm_unreachable("Unexpected ABI");
18655 case RISCVABI::ABI_LP64:
18657 break;
18660 UseGPRForF16_F32 = !IsFixed;
18661 break;
18664 UseGPRForF16_F32 = !IsFixed;
18665 UseGPRForF64 = !IsFixed;
18666 break;
18667 }
18668
18669 // FPR16, FPR32, and FPR64 alias each other.
18670 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
18671 UseGPRForF16_F32 = true;
18672 UseGPRForF64 = true;
18673 }
18674
18675 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
18676 // similar local variables rather than directly checking against the target
18677 // ABI.
18678
18679 if (UseGPRForF16_F32 &&
18680 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
18681 LocVT = XLenVT;
18682 LocInfo = CCValAssign::BCvt;
18683 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
18684 LocVT = MVT::i64;
18685 LocInfo = CCValAssign::BCvt;
18686 }
18687
18689
18690 // If this is a variadic argument, the RISC-V calling convention requires
18691 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
18692 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
18693 // be used regardless of whether the original argument was split during
18694 // legalisation or not. The argument will not be passed by registers if the
18695 // original type is larger than 2*XLEN, so the register alignment rule does
18696 // not apply.
18697 // TODO: To be compatible with GCC's behaviors, we don't align registers
18698 // currently if we are using ILP32E calling convention. This behavior may be
18699 // changed when RV32E/ILP32E is ratified.
18700 unsigned TwoXLenInBytes = (2 * XLen) / 8;
18701 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
18702 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes &&
18703 ABI != RISCVABI::ABI_ILP32E) {
18704 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
18705 // Skip 'odd' register if necessary.
18706 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
18707 State.AllocateReg(ArgGPRs);
18708 }
18709
18710 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
18711 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
18712 State.getPendingArgFlags();
18713
18714 assert(PendingLocs.size() == PendingArgFlags.size() &&
18715 "PendingLocs and PendingArgFlags out of sync");
18716
18717 // Handle passing f64 on RV32D with a soft float ABI or when floating point
18718 // registers are exhausted.
18719 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
18720 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
18721 // Depending on available argument GPRS, f64 may be passed in a pair of
18722 // GPRs, split between a GPR and the stack, or passed completely on the
18723 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
18724 // cases.
18725 Register Reg = State.AllocateReg(ArgGPRs);
18726 if (!Reg) {
18727 unsigned StackOffset = State.AllocateStack(8, Align(8));
18728 State.addLoc(
18729 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18730 return false;
18731 }
18732 LocVT = MVT::i32;
18733 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18734 Register HiReg = State.AllocateReg(ArgGPRs);
18735 if (HiReg) {
18736 State.addLoc(
18737 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
18738 } else {
18739 unsigned StackOffset = State.AllocateStack(4, Align(4));
18740 State.addLoc(
18741 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18742 }
18743 return false;
18744 }
18745
18746 // Fixed-length vectors are located in the corresponding scalable-vector
18747 // container types.
18748 if (ValVT.isFixedLengthVector())
18749 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
18750
18751 // Split arguments might be passed indirectly, so keep track of the pending
18752 // values. Split vectors are passed via a mix of registers and indirectly, so
18753 // treat them as we would any other argument.
18754 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
18755 LocVT = XLenVT;
18756 LocInfo = CCValAssign::Indirect;
18757 PendingLocs.push_back(
18758 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
18759 PendingArgFlags.push_back(ArgFlags);
18760 if (!ArgFlags.isSplitEnd()) {
18761 return false;
18762 }
18763 }
18764
18765 // If the split argument only had two elements, it should be passed directly
18766 // in registers or on the stack.
18767 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
18768 PendingLocs.size() <= 2) {
18769 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
18770 // Apply the normal calling convention rules to the first half of the
18771 // split argument.
18772 CCValAssign VA = PendingLocs[0];
18773 ISD::ArgFlagsTy AF = PendingArgFlags[0];
18774 PendingLocs.clear();
18775 PendingArgFlags.clear();
18776 return CC_RISCVAssign2XLen(
18777 XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags,
18778 ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E);
18779 }
18780
18781 // Allocate to a register if possible, or else a stack slot.
18782 Register Reg;
18783 unsigned StoreSizeBytes = XLen / 8;
18784 Align StackAlign = Align(XLen / 8);
18785
18786 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
18787 Reg = State.AllocateReg(ArgFPR16s);
18788 else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
18789 Reg = State.AllocateReg(ArgFPR32s);
18790 else if (ValVT == MVT::f64 && !UseGPRForF64)
18791 Reg = State.AllocateReg(ArgFPR64s);
18792 else if (ValVT.isVector()) {
18793 Reg = RVVDispatcher.getNextPhysReg();
18794 if (!Reg) {
18795 // For return values, the vector must be passed fully via registers or
18796 // via the stack.
18797 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
18798 // but we're using all of them.
18799 if (IsRet)
18800 return true;
18801 // Try using a GPR to pass the address
18802 if ((Reg = State.AllocateReg(ArgGPRs))) {
18803 LocVT = XLenVT;
18804 LocInfo = CCValAssign::Indirect;
18805 } else if (ValVT.isScalableVector()) {
18806 LocVT = XLenVT;
18807 LocInfo = CCValAssign::Indirect;
18808 } else {
18809 // Pass fixed-length vectors on the stack.
18810 LocVT = ValVT;
18811 StoreSizeBytes = ValVT.getStoreSize();
18812 // Align vectors to their element sizes, being careful for vXi1
18813 // vectors.
18814 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
18815 }
18816 }
18817 } else {
18818 Reg = State.AllocateReg(ArgGPRs);
18819 }
18820
18821 unsigned StackOffset =
18822 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
18823
18824 // If we reach this point and PendingLocs is non-empty, we must be at the
18825 // end of a split argument that must be passed indirectly.
18826 if (!PendingLocs.empty()) {
18827 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
18828 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
18829
18830 for (auto &It : PendingLocs) {
18831 if (Reg)
18832 It.convertToReg(Reg);
18833 else
18834 It.convertToMem(StackOffset);
18835 State.addLoc(It);
18836 }
18837 PendingLocs.clear();
18838 PendingArgFlags.clear();
18839 return false;
18840 }
18841
18842 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
18843 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
18844 "Expected an XLenVT or vector types at this stage");
18845
18846 if (Reg) {
18847 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18848 return false;
18849 }
18850
18851 // When a scalar floating-point value is passed on the stack, no
18852 // bit-conversion is needed.
18853 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
18854 assert(!ValVT.isVector());
18855 LocVT = ValVT;
18856 LocInfo = CCValAssign::Full;
18857 }
18858 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18859 return false;
18860}
18861
18862template <typename ArgTy>
18863static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
18864 for (const auto &ArgIdx : enumerate(Args)) {
18865 MVT ArgVT = ArgIdx.value().VT;
18866 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
18867 return ArgIdx.index();
18868 }
18869 return std::nullopt;
18870}
18871
18872void RISCVTargetLowering::analyzeInputArgs(
18873 MachineFunction &MF, CCState &CCInfo,
18874 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
18875 RISCVCCAssignFn Fn) const {
18876 unsigned NumArgs = Ins.size();
18878
18879 RVVArgDispatcher Dispatcher;
18880 if (IsRet) {
18881 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)};
18882 } else {
18883 SmallVector<Type *, 4> TypeList;
18884 for (const Argument &Arg : MF.getFunction().args())
18885 TypeList.push_back(Arg.getType());
18886 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)};
18887 }
18888
18889 for (unsigned i = 0; i != NumArgs; ++i) {
18890 MVT ArgVT = Ins[i].VT;
18891 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
18892
18893 Type *ArgTy = nullptr;
18894 if (IsRet)
18895 ArgTy = FType->getReturnType();
18896 else if (Ins[i].isOrigArg())
18897 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
18898
18900 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18901 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
18902 Dispatcher)) {
18903 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
18904 << ArgVT << '\n');
18905 llvm_unreachable(nullptr);
18906 }
18907 }
18908}
18909
18910void RISCVTargetLowering::analyzeOutputArgs(
18911 MachineFunction &MF, CCState &CCInfo,
18912 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
18913 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
18914 unsigned NumArgs = Outs.size();
18915
18916 SmallVector<Type *, 4> TypeList;
18917 if (IsRet)
18918 TypeList.push_back(MF.getFunction().getReturnType());
18919 else if (CLI)
18920 for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs())
18921 TypeList.push_back(Arg.Ty);
18922 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)};
18923
18924 for (unsigned i = 0; i != NumArgs; i++) {
18925 MVT ArgVT = Outs[i].VT;
18926 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
18927 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
18928
18930 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18931 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
18932 Dispatcher)) {
18933 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
18934 << ArgVT << "\n");
18935 llvm_unreachable(nullptr);
18936 }
18937 }
18938}
18939
18940// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
18941// values.
18943 const CCValAssign &VA, const SDLoc &DL,
18944 const RISCVSubtarget &Subtarget) {
18945 switch (VA.getLocInfo()) {
18946 default:
18947 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18948 case CCValAssign::Full:
18950 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
18951 break;
18952 case CCValAssign::BCvt:
18953 if (VA.getLocVT().isInteger() &&
18954 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
18955 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
18956 } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
18957 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
18958 } else {
18959 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
18960 }
18961 break;
18962 }
18963 return Val;
18964}
18965
18966// The caller is responsible for loading the full value if the argument is
18967// passed with CCValAssign::Indirect.
18969 const CCValAssign &VA, const SDLoc &DL,
18970 const ISD::InputArg &In,
18971 const RISCVTargetLowering &TLI) {
18974 EVT LocVT = VA.getLocVT();
18975 SDValue Val;
18976 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
18977 Register VReg = RegInfo.createVirtualRegister(RC);
18978 RegInfo.addLiveIn(VA.getLocReg(), VReg);
18979 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
18980
18981 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
18982 if (In.isOrigArg()) {
18983 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
18984 if (OrigArg->getType()->isIntegerTy()) {
18985 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
18986 // An input zero extended from i31 can also be considered sign extended.
18987 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
18988 (BitWidth < 32 && In.Flags.isZExt())) {
18990 RVFI->addSExt32Register(VReg);
18991 }
18992 }
18993 }
18994
18996 return Val;
18997
18998 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
18999}
19000
19002 const CCValAssign &VA, const SDLoc &DL,
19003 const RISCVSubtarget &Subtarget) {
19004 EVT LocVT = VA.getLocVT();
19005
19006 switch (VA.getLocInfo()) {
19007 default:
19008 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19009 case CCValAssign::Full:
19010 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
19011 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
19012 break;
19013 case CCValAssign::BCvt:
19014 if (LocVT.isInteger() &&
19015 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
19016 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
19017 } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
19018 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
19019 } else {
19020 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
19021 }
19022 break;
19023 }
19024 return Val;
19025}
19026
19027// The caller is responsible for loading the full value if the argument is
19028// passed with CCValAssign::Indirect.
19030 const CCValAssign &VA, const SDLoc &DL) {
19032 MachineFrameInfo &MFI = MF.getFrameInfo();
19033 EVT LocVT = VA.getLocVT();
19034 EVT ValVT = VA.getValVT();
19036 if (ValVT.isScalableVector()) {
19037 // When the value is a scalable vector, we save the pointer which points to
19038 // the scalable vector value in the stack. The ValVT will be the pointer
19039 // type, instead of the scalable vector type.
19040 ValVT = LocVT;
19041 }
19042 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
19043 /*IsImmutable=*/true);
19044 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19045 SDValue Val;
19046
19047 ISD::LoadExtType ExtType;
19048 switch (VA.getLocInfo()) {
19049 default:
19050 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19051 case CCValAssign::Full:
19053 case CCValAssign::BCvt:
19054 ExtType = ISD::NON_EXTLOAD;
19055 break;
19056 }
19057 Val = DAG.getExtLoad(
19058 ExtType, DL, LocVT, Chain, FIN,
19060 return Val;
19061}
19062
19064 const CCValAssign &VA,
19065 const CCValAssign &HiVA,
19066 const SDLoc &DL) {
19067 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
19068 "Unexpected VA");
19070 MachineFrameInfo &MFI = MF.getFrameInfo();
19072
19073 assert(VA.isRegLoc() && "Expected register VA assignment");
19074
19075 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19076 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
19077 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
19078 SDValue Hi;
19079 if (HiVA.isMemLoc()) {
19080 // Second half of f64 is passed on the stack.
19081 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
19082 /*IsImmutable=*/true);
19083 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
19084 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
19086 } else {
19087 // Second half of f64 is passed in another GPR.
19088 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19089 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
19090 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
19091 }
19092 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
19093}
19094
19095// FastCC has less than 1% performance improvement for some particular
19096// benchmark. But theoretically, it may has benenfit for some cases.
19098 unsigned ValNo, MVT ValVT, MVT LocVT,
19099 CCValAssign::LocInfo LocInfo,
19100 ISD::ArgFlagsTy ArgFlags, CCState &State,
19101 bool IsFixed, bool IsRet, Type *OrigTy,
19102 const RISCVTargetLowering &TLI,
19103 RVVArgDispatcher &RVVDispatcher) {
19104 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
19105 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
19106 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19107 return false;
19108 }
19109 }
19110
19111 const RISCVSubtarget &Subtarget = TLI.getSubtarget();
19112
19113 if (LocVT == MVT::f16 &&
19114 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
19115 static const MCPhysReg FPR16List[] = {
19116 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
19117 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
19118 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
19119 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
19120 if (unsigned Reg = State.AllocateReg(FPR16List)) {
19121 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19122 return false;
19123 }
19124 }
19125
19126 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
19127 static const MCPhysReg FPR32List[] = {
19128 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
19129 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
19130 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
19131 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
19132 if (unsigned Reg = State.AllocateReg(FPR32List)) {
19133 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19134 return false;
19135 }
19136 }
19137
19138 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
19139 static const MCPhysReg FPR64List[] = {
19140 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
19141 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
19142 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
19143 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
19144 if (unsigned Reg = State.AllocateReg(FPR64List)) {
19145 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19146 return false;
19147 }
19148 }
19149
19150 // Check if there is an available GPR before hitting the stack.
19151 if ((LocVT == MVT::f16 &&
19152 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
19153 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
19154 (LocVT == MVT::f64 && Subtarget.is64Bit() &&
19155 Subtarget.hasStdExtZdinx())) {
19156 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
19157 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19158 return false;
19159 }
19160 }
19161
19162 if (LocVT == MVT::f16) {
19163 unsigned Offset2 = State.AllocateStack(2, Align(2));
19164 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
19165 return false;
19166 }
19167
19168 if (LocVT == MVT::i32 || LocVT == MVT::f32) {
19169 unsigned Offset4 = State.AllocateStack(4, Align(4));
19170 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
19171 return false;
19172 }
19173
19174 if (LocVT == MVT::i64 || LocVT == MVT::f64) {
19175 unsigned Offset5 = State.AllocateStack(8, Align(8));
19176 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
19177 return false;
19178 }
19179
19180 if (LocVT.isVector()) {
19181 MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg();
19182 if (AllocatedVReg) {
19183 // Fixed-length vectors are located in the corresponding scalable-vector
19184 // container types.
19185 if (ValVT.isFixedLengthVector())
19186 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
19187 State.addLoc(
19188 CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo));
19189 } else {
19190 // Try and pass the address via a "fast" GPR.
19191 if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
19192 LocInfo = CCValAssign::Indirect;
19193 LocVT = TLI.getSubtarget().getXLenVT();
19194 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
19195 } else if (ValVT.isFixedLengthVector()) {
19196 auto StackAlign =
19198 unsigned StackOffset =
19199 State.AllocateStack(ValVT.getStoreSize(), StackAlign);
19200 State.addLoc(
19201 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
19202 } else {
19203 // Can't pass scalable vectors on the stack.
19204 return true;
19205 }
19206 }
19207
19208 return false;
19209 }
19210
19211 return true; // CC didn't match.
19212}
19213
19214bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
19215 CCValAssign::LocInfo LocInfo,
19216 ISD::ArgFlagsTy ArgFlags, CCState &State) {
19217 if (ArgFlags.isNest()) {
19219 "Attribute 'nest' is not supported in GHC calling convention");
19220 }
19221
19222 static const MCPhysReg GPRList[] = {
19223 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
19224 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
19225
19226 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
19227 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
19228 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
19229 if (unsigned Reg = State.AllocateReg(GPRList)) {
19230 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19231 return false;
19232 }
19233 }
19234
19235 const RISCVSubtarget &Subtarget =
19237
19238 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
19239 // Pass in STG registers: F1, ..., F6
19240 // fs0 ... fs5
19241 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
19242 RISCV::F18_F, RISCV::F19_F,
19243 RISCV::F20_F, RISCV::F21_F};
19244 if (unsigned Reg = State.AllocateReg(FPR32List)) {
19245 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19246 return false;
19247 }
19248 }
19249
19250 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
19251 // Pass in STG registers: D1, ..., D6
19252 // fs6 ... fs11
19253 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
19254 RISCV::F24_D, RISCV::F25_D,
19255 RISCV::F26_D, RISCV::F27_D};
19256 if (unsigned Reg = State.AllocateReg(FPR64List)) {
19257 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19258 return false;
19259 }
19260 }
19261
19262 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
19263 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
19264 Subtarget.is64Bit())) {
19265 if (unsigned Reg = State.AllocateReg(GPRList)) {
19266 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19267 return false;
19268 }
19269 }
19270
19271 report_fatal_error("No registers left in GHC calling convention");
19272 return true;
19273}
19274
19275// Transform physical registers into virtual registers.
19277 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
19278 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
19279 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
19280
19282
19283 switch (CallConv) {
19284 default:
19285 report_fatal_error("Unsupported calling convention");
19286 case CallingConv::C:
19287 case CallingConv::Fast:
19289 case CallingConv::GRAAL:
19291 break;
19292 case CallingConv::GHC:
19293 if (Subtarget.hasStdExtE())
19294 report_fatal_error("GHC calling convention is not supported on RVE!");
19295 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
19296 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
19297 "(Zdinx/D) instruction set extensions");
19298 }
19299
19300 const Function &Func = MF.getFunction();
19301 if (Func.hasFnAttribute("interrupt")) {
19302 if (!Func.arg_empty())
19304 "Functions with the interrupt attribute cannot have arguments!");
19305
19306 StringRef Kind =
19307 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19308
19309 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
19311 "Function interrupt attribute argument not supported!");
19312 }
19313
19314 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19315 MVT XLenVT = Subtarget.getXLenVT();
19316 unsigned XLenInBytes = Subtarget.getXLen() / 8;
19317 // Used with vargs to acumulate store chains.
19318 std::vector<SDValue> OutChains;
19319
19320 // Assign locations to all of the incoming arguments.
19322 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19323
19324 if (CallConv == CallingConv::GHC)
19326 else
19327 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
19329 : RISCV::CC_RISCV);
19330
19331 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
19332 CCValAssign &VA = ArgLocs[i];
19333 SDValue ArgValue;
19334 // Passing f64 on RV32D with a soft float ABI must be handled as a special
19335 // case.
19336 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19337 assert(VA.needsCustom());
19338 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
19339 } else if (VA.isRegLoc())
19340 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
19341 else
19342 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
19343
19344 if (VA.getLocInfo() == CCValAssign::Indirect) {
19345 // If the original argument was split and passed by reference (e.g. i128
19346 // on RV32), we need to load all parts of it here (using the same
19347 // address). Vectors may be partly split to registers and partly to the
19348 // stack, in which case the base address is partly offset and subsequent
19349 // stores are relative to that.
19350 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
19352 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
19353 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
19354 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19355 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
19356 CCValAssign &PartVA = ArgLocs[i + 1];
19357 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
19358 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19359 if (PartVA.getValVT().isScalableVector())
19360 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19361 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
19362 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
19364 ++i;
19365 ++InsIdx;
19366 }
19367 continue;
19368 }
19369 InVals.push_back(ArgValue);
19370 }
19371
19372 if (any_of(ArgLocs,
19373 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19374 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19375
19376 if (IsVarArg) {
19377 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
19378 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
19379 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
19380 MachineFrameInfo &MFI = MF.getFrameInfo();
19381 MachineRegisterInfo &RegInfo = MF.getRegInfo();
19383
19384 // Size of the vararg save area. For now, the varargs save area is either
19385 // zero or large enough to hold a0-a7.
19386 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
19387 int FI;
19388
19389 // If all registers are allocated, then all varargs must be passed on the
19390 // stack and we don't need to save any argregs.
19391 if (VarArgsSaveSize == 0) {
19392 int VaArgOffset = CCInfo.getStackSize();
19393 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
19394 } else {
19395 int VaArgOffset = -VarArgsSaveSize;
19396 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
19397
19398 // If saving an odd number of registers then create an extra stack slot to
19399 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
19400 // offsets to even-numbered registered remain 2*XLEN-aligned.
19401 if (Idx % 2) {
19403 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
19404 VarArgsSaveSize += XLenInBytes;
19405 }
19406
19407 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19408
19409 // Copy the integer registers that may have been used for passing varargs
19410 // to the vararg save area.
19411 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
19412 const Register Reg = RegInfo.createVirtualRegister(RC);
19413 RegInfo.addLiveIn(ArgRegs[I], Reg);
19414 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
19415 SDValue Store = DAG.getStore(
19416 Chain, DL, ArgValue, FIN,
19417 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
19418 OutChains.push_back(Store);
19419 FIN =
19420 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
19421 }
19422 }
19423
19424 // Record the frame index of the first variable argument
19425 // which is a value necessary to VASTART.
19426 RVFI->setVarArgsFrameIndex(FI);
19427 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
19428 }
19429
19430 // All stores are grouped in one node to allow the matching between
19431 // the size of Ins and InVals. This only happens for vararg functions.
19432 if (!OutChains.empty()) {
19433 OutChains.push_back(Chain);
19434 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
19435 }
19436
19437 return Chain;
19438}
19439
19440/// isEligibleForTailCallOptimization - Check whether the call is eligible
19441/// for tail call optimization.
19442/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
19443bool RISCVTargetLowering::isEligibleForTailCallOptimization(
19444 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
19445 const SmallVector<CCValAssign, 16> &ArgLocs) const {
19446
19447 auto CalleeCC = CLI.CallConv;
19448 auto &Outs = CLI.Outs;
19449 auto &Caller = MF.getFunction();
19450 auto CallerCC = Caller.getCallingConv();
19451
19452 // Exception-handling functions need a special set of instructions to
19453 // indicate a return to the hardware. Tail-calling another function would
19454 // probably break this.
19455 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
19456 // should be expanded as new function attributes are introduced.
19457 if (Caller.hasFnAttribute("interrupt"))
19458 return false;
19459
19460 // Do not tail call opt if the stack is used to pass parameters.
19461 if (CCInfo.getStackSize() != 0)
19462 return false;
19463
19464 // Do not tail call opt if any parameters need to be passed indirectly.
19465 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
19466 // passed indirectly. So the address of the value will be passed in a
19467 // register, or if not available, then the address is put on the stack. In
19468 // order to pass indirectly, space on the stack often needs to be allocated
19469 // in order to store the value. In this case the CCInfo.getNextStackOffset()
19470 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
19471 // are passed CCValAssign::Indirect.
19472 for (auto &VA : ArgLocs)
19473 if (VA.getLocInfo() == CCValAssign::Indirect)
19474 return false;
19475
19476 // Do not tail call opt if either caller or callee uses struct return
19477 // semantics.
19478 auto IsCallerStructRet = Caller.hasStructRetAttr();
19479 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
19480 if (IsCallerStructRet || IsCalleeStructRet)
19481 return false;
19482
19483 // The callee has to preserve all registers the caller needs to preserve.
19484 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
19485 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
19486 if (CalleeCC != CallerCC) {
19487 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
19488 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
19489 return false;
19490 }
19491
19492 // Byval parameters hand the function a pointer directly into the stack area
19493 // we want to reuse during a tail call. Working around this *is* possible
19494 // but less efficient and uglier in LowerCall.
19495 for (auto &Arg : Outs)
19496 if (Arg.Flags.isByVal())
19497 return false;
19498
19499 return true;
19500}
19501
19503 return DAG.getDataLayout().getPrefTypeAlign(
19504 VT.getTypeForEVT(*DAG.getContext()));
19505}
19506
19507// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
19508// and output parameter nodes.
19510 SmallVectorImpl<SDValue> &InVals) const {
19511 SelectionDAG &DAG = CLI.DAG;
19512 SDLoc &DL = CLI.DL;
19514 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
19516 SDValue Chain = CLI.Chain;
19517 SDValue Callee = CLI.Callee;
19518 bool &IsTailCall = CLI.IsTailCall;
19519 CallingConv::ID CallConv = CLI.CallConv;
19520 bool IsVarArg = CLI.IsVarArg;
19521 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19522 MVT XLenVT = Subtarget.getXLenVT();
19523
19525
19526 // Analyze the operands of the call, assigning locations to each operand.
19528 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19529
19530 if (CallConv == CallingConv::GHC) {
19531 if (Subtarget.hasStdExtE())
19532 report_fatal_error("GHC calling convention is not supported on RVE!");
19534 } else
19535 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
19537 : RISCV::CC_RISCV);
19538
19539 // Check if it's really possible to do a tail call.
19540 if (IsTailCall)
19541 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
19542
19543 if (IsTailCall)
19544 ++NumTailCalls;
19545 else if (CLI.CB && CLI.CB->isMustTailCall())
19546 report_fatal_error("failed to perform tail call elimination on a call "
19547 "site marked musttail");
19548
19549 // Get a count of how many bytes are to be pushed on the stack.
19550 unsigned NumBytes = ArgCCInfo.getStackSize();
19551
19552 // Create local copies for byval args
19553 SmallVector<SDValue, 8> ByValArgs;
19554 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19555 ISD::ArgFlagsTy Flags = Outs[i].Flags;
19556 if (!Flags.isByVal())
19557 continue;
19558
19559 SDValue Arg = OutVals[i];
19560 unsigned Size = Flags.getByValSize();
19561 Align Alignment = Flags.getNonZeroByValAlign();
19562
19563 int FI =
19564 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
19565 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
19566 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
19567
19568 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
19569 /*IsVolatile=*/false,
19570 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
19572 ByValArgs.push_back(FIPtr);
19573 }
19574
19575 if (!IsTailCall)
19576 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
19577
19578 // Copy argument values to their designated locations.
19580 SmallVector<SDValue, 8> MemOpChains;
19581 SDValue StackPtr;
19582 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
19583 ++i, ++OutIdx) {
19584 CCValAssign &VA = ArgLocs[i];
19585 SDValue ArgValue = OutVals[OutIdx];
19586 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
19587
19588 // Handle passing f64 on RV32D with a soft float ABI as a special case.
19589 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19590 assert(VA.isRegLoc() && "Expected register VA assignment");
19591 assert(VA.needsCustom());
19592 SDValue SplitF64 = DAG.getNode(
19593 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
19594 SDValue Lo = SplitF64.getValue(0);
19595 SDValue Hi = SplitF64.getValue(1);
19596
19597 Register RegLo = VA.getLocReg();
19598 RegsToPass.push_back(std::make_pair(RegLo, Lo));
19599
19600 // Get the CCValAssign for the Hi part.
19601 CCValAssign &HiVA = ArgLocs[++i];
19602
19603 if (HiVA.isMemLoc()) {
19604 // Second half of f64 is passed on the stack.
19605 if (!StackPtr.getNode())
19606 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19608 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19609 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
19610 // Emit the store.
19611 MemOpChains.push_back(
19612 DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo()));
19613 } else {
19614 // Second half of f64 is passed in another GPR.
19615 Register RegHigh = HiVA.getLocReg();
19616 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
19617 }
19618 continue;
19619 }
19620
19621 // Promote the value if needed.
19622 // For now, only handle fully promoted and indirect arguments.
19623 if (VA.getLocInfo() == CCValAssign::Indirect) {
19624 // Store the argument in a stack slot and pass its address.
19625 Align StackAlign =
19626 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
19627 getPrefTypeAlign(ArgValue.getValueType(), DAG));
19628 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
19629 // If the original argument was split (e.g. i128), we need
19630 // to store the required parts of it here (and pass just one address).
19631 // Vectors may be partly split to registers and partly to the stack, in
19632 // which case the base address is partly offset and subsequent stores are
19633 // relative to that.
19634 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
19635 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
19636 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19637 // Calculate the total size to store. We don't have access to what we're
19638 // actually storing other than performing the loop and collecting the
19639 // info.
19641 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
19642 SDValue PartValue = OutVals[OutIdx + 1];
19643 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
19644 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19645 EVT PartVT = PartValue.getValueType();
19646 if (PartVT.isScalableVector())
19647 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19648 StoredSize += PartVT.getStoreSize();
19649 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
19650 Parts.push_back(std::make_pair(PartValue, Offset));
19651 ++i;
19652 ++OutIdx;
19653 }
19654 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
19655 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
19656 MemOpChains.push_back(
19657 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
19659 for (const auto &Part : Parts) {
19660 SDValue PartValue = Part.first;
19661 SDValue PartOffset = Part.second;
19663 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
19664 MemOpChains.push_back(
19665 DAG.getStore(Chain, DL, PartValue, Address,
19667 }
19668 ArgValue = SpillSlot;
19669 } else {
19670 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
19671 }
19672
19673 // Use local copy if it is a byval arg.
19674 if (Flags.isByVal())
19675 ArgValue = ByValArgs[j++];
19676
19677 if (VA.isRegLoc()) {
19678 // Queue up the argument copies and emit them at the end.
19679 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
19680 } else {
19681 assert(VA.isMemLoc() && "Argument not register or memory");
19682 assert(!IsTailCall && "Tail call not allowed if stack is used "
19683 "for passing parameters");
19684
19685 // Work out the address of the stack slot.
19686 if (!StackPtr.getNode())
19687 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19689 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19691
19692 // Emit the store.
19693 MemOpChains.push_back(
19694 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
19695 }
19696 }
19697
19698 // Join the stores, which are independent of one another.
19699 if (!MemOpChains.empty())
19700 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
19701
19702 SDValue Glue;
19703
19704 // Build a sequence of copy-to-reg nodes, chained and glued together.
19705 for (auto &Reg : RegsToPass) {
19706 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
19707 Glue = Chain.getValue(1);
19708 }
19709
19710 // Validate that none of the argument registers have been marked as
19711 // reserved, if so report an error. Do the same for the return address if this
19712 // is not a tailcall.
19713 validateCCReservedRegs(RegsToPass, MF);
19714 if (!IsTailCall &&
19717 MF.getFunction(),
19718 "Return address register required, but has been reserved."});
19719
19720 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
19721 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
19722 // split it and then direct call can be matched by PseudoCALL.
19723 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
19724 const GlobalValue *GV = S->getGlobal();
19725 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
19726 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
19727 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
19728 }
19729
19730 // The first call operand is the chain and the second is the target address.
19732 Ops.push_back(Chain);
19733 Ops.push_back(Callee);
19734
19735 // Add argument registers to the end of the list so that they are
19736 // known live into the call.
19737 for (auto &Reg : RegsToPass)
19738 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
19739
19740 if (!IsTailCall) {
19741 // Add a register mask operand representing the call-preserved registers.
19742 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
19743 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
19744 assert(Mask && "Missing call preserved mask for calling convention");
19745 Ops.push_back(DAG.getRegisterMask(Mask));
19746 }
19747
19748 // Glue the call to the argument copies, if any.
19749 if (Glue.getNode())
19750 Ops.push_back(Glue);
19751
19752 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
19753 "Unexpected CFI type for a direct call");
19754
19755 // Emit the call.
19756 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
19757
19758 if (IsTailCall) {
19760 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
19761 if (CLI.CFIType)
19762 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19763 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
19764 return Ret;
19765 }
19766
19767 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
19768 if (CLI.CFIType)
19769 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19770 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
19771 Glue = Chain.getValue(1);
19772
19773 // Mark the end of the call, which is glued to the call itself.
19774 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
19775 Glue = Chain.getValue(1);
19776
19777 // Assign locations to each value returned by this call.
19779 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
19780 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
19781
19782 // Copy all of the result registers out of their specified physreg.
19783 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
19784 auto &VA = RVLocs[i];
19785 // Copy the value out
19786 SDValue RetValue =
19787 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
19788 // Glue the RetValue to the end of the call sequence
19789 Chain = RetValue.getValue(1);
19790 Glue = RetValue.getValue(2);
19791
19792 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19793 assert(VA.needsCustom());
19794 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
19795 MVT::i32, Glue);
19796 Chain = RetValue2.getValue(1);
19797 Glue = RetValue2.getValue(2);
19798 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
19799 RetValue2);
19800 }
19801
19802 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
19803
19804 InVals.push_back(RetValue);
19805 }
19806
19807 return Chain;
19808}
19809
19811 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
19812 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
19814 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
19815
19816 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)};
19817
19818 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19819 MVT VT = Outs[i].VT;
19820 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19821 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
19822 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
19823 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
19824 nullptr, *this, Dispatcher))
19825 return false;
19826 }
19827 return true;
19828}
19829
19830SDValue
19832 bool IsVarArg,
19834 const SmallVectorImpl<SDValue> &OutVals,
19835 const SDLoc &DL, SelectionDAG &DAG) const {
19837 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19838
19839 // Stores the assignment of the return value to a location.
19841
19842 // Info about the registers and stack slot.
19843 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
19844 *DAG.getContext());
19845
19846 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
19847 nullptr, RISCV::CC_RISCV);
19848
19849 if (CallConv == CallingConv::GHC && !RVLocs.empty())
19850 report_fatal_error("GHC functions return void only");
19851
19852 SDValue Glue;
19853 SmallVector<SDValue, 4> RetOps(1, Chain);
19854
19855 // Copy the result values into the output registers.
19856 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
19857 SDValue Val = OutVals[OutIdx];
19858 CCValAssign &VA = RVLocs[i];
19859 assert(VA.isRegLoc() && "Can only return in registers!");
19860
19861 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19862 // Handle returning f64 on RV32D with a soft float ABI.
19863 assert(VA.isRegLoc() && "Expected return via registers");
19864 assert(VA.needsCustom());
19865 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
19866 DAG.getVTList(MVT::i32, MVT::i32), Val);
19867 SDValue Lo = SplitF64.getValue(0);
19868 SDValue Hi = SplitF64.getValue(1);
19869 Register RegLo = VA.getLocReg();
19870 Register RegHi = RVLocs[++i].getLocReg();
19871
19872 if (STI.isRegisterReservedByUser(RegLo) ||
19873 STI.isRegisterReservedByUser(RegHi))
19875 MF.getFunction(),
19876 "Return value register required, but has been reserved."});
19877
19878 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
19879 Glue = Chain.getValue(1);
19880 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
19881 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
19882 Glue = Chain.getValue(1);
19883 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
19884 } else {
19885 // Handle a 'normal' return.
19886 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
19887 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
19888
19889 if (STI.isRegisterReservedByUser(VA.getLocReg()))
19891 MF.getFunction(),
19892 "Return value register required, but has been reserved."});
19893
19894 // Guarantee that all emitted copies are stuck together.
19895 Glue = Chain.getValue(1);
19896 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
19897 }
19898 }
19899
19900 RetOps[0] = Chain; // Update chain.
19901
19902 // Add the glue node if we have it.
19903 if (Glue.getNode()) {
19904 RetOps.push_back(Glue);
19905 }
19906
19907 if (any_of(RVLocs,
19908 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19909 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19910
19911 unsigned RetOpc = RISCVISD::RET_GLUE;
19912 // Interrupt service routines use different return instructions.
19913 const Function &Func = DAG.getMachineFunction().getFunction();
19914 if (Func.hasFnAttribute("interrupt")) {
19915 if (!Func.getReturnType()->isVoidTy())
19917 "Functions with the interrupt attribute must have void return type!");
19918
19920 StringRef Kind =
19921 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19922
19923 if (Kind == "supervisor")
19924 RetOpc = RISCVISD::SRET_GLUE;
19925 else
19926 RetOpc = RISCVISD::MRET_GLUE;
19927 }
19928
19929 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
19930}
19931
19932void RISCVTargetLowering::validateCCReservedRegs(
19933 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
19934 MachineFunction &MF) const {
19935 const Function &F = MF.getFunction();
19936 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19937
19938 if (llvm::any_of(Regs, [&STI](auto Reg) {
19939 return STI.isRegisterReservedByUser(Reg.first);
19940 }))
19941 F.getContext().diagnose(DiagnosticInfoUnsupported{
19942 F, "Argument register required, but has been reserved."});
19943}
19944
19945// Check if the result of the node is only used as a return value, as
19946// otherwise we can't perform a tail-call.
19948 if (N->getNumValues() != 1)
19949 return false;
19950 if (!N->hasNUsesOfValue(1, 0))
19951 return false;
19952
19953 SDNode *Copy = *N->use_begin();
19954
19955 if (Copy->getOpcode() == ISD::BITCAST) {
19956 return isUsedByReturnOnly(Copy, Chain);
19957 }
19958
19959 // TODO: Handle additional opcodes in order to support tail-calling libcalls
19960 // with soft float ABIs.
19961 if (Copy->getOpcode() != ISD::CopyToReg) {
19962 return false;
19963 }
19964
19965 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
19966 // isn't safe to perform a tail call.
19967 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
19968 return false;
19969
19970 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
19971 bool HasRet = false;
19972 for (SDNode *Node : Copy->uses()) {
19973 if (Node->getOpcode() != RISCVISD::RET_GLUE)
19974 return false;
19975 HasRet = true;
19976 }
19977 if (!HasRet)
19978 return false;
19979
19980 Chain = Copy->getOperand(0);
19981 return true;
19982}
19983
19985 return CI->isTailCall();
19986}
19987
19988const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
19989#define NODE_NAME_CASE(NODE) \
19990 case RISCVISD::NODE: \
19991 return "RISCVISD::" #NODE;
19992 // clang-format off
19993 switch ((RISCVISD::NodeType)Opcode) {
19995 break;
19996 NODE_NAME_CASE(RET_GLUE)
19997 NODE_NAME_CASE(SRET_GLUE)
19998 NODE_NAME_CASE(MRET_GLUE)
19999 NODE_NAME_CASE(CALL)
20000 NODE_NAME_CASE(SELECT_CC)
20001 NODE_NAME_CASE(BR_CC)
20002 NODE_NAME_CASE(BuildPairF64)
20003 NODE_NAME_CASE(SplitF64)
20004 NODE_NAME_CASE(TAIL)
20005 NODE_NAME_CASE(ADD_LO)
20006 NODE_NAME_CASE(HI)
20007 NODE_NAME_CASE(LLA)
20008 NODE_NAME_CASE(ADD_TPREL)
20009 NODE_NAME_CASE(MULHSU)
20010 NODE_NAME_CASE(SHL_ADD)
20011 NODE_NAME_CASE(SLLW)
20012 NODE_NAME_CASE(SRAW)
20013 NODE_NAME_CASE(SRLW)
20014 NODE_NAME_CASE(DIVW)
20015 NODE_NAME_CASE(DIVUW)
20016 NODE_NAME_CASE(REMUW)
20017 NODE_NAME_CASE(ROLW)
20018 NODE_NAME_CASE(RORW)
20019 NODE_NAME_CASE(CLZW)
20020 NODE_NAME_CASE(CTZW)
20021 NODE_NAME_CASE(ABSW)
20022 NODE_NAME_CASE(FMV_H_X)
20023 NODE_NAME_CASE(FMV_X_ANYEXTH)
20024 NODE_NAME_CASE(FMV_X_SIGNEXTH)
20025 NODE_NAME_CASE(FMV_W_X_RV64)
20026 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
20027 NODE_NAME_CASE(FCVT_X)
20028 NODE_NAME_CASE(FCVT_XU)
20029 NODE_NAME_CASE(FCVT_W_RV64)
20030 NODE_NAME_CASE(FCVT_WU_RV64)
20031 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
20032 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
20033 NODE_NAME_CASE(FP_ROUND_BF16)
20034 NODE_NAME_CASE(FP_EXTEND_BF16)
20035 NODE_NAME_CASE(FROUND)
20036 NODE_NAME_CASE(FCLASS)
20037 NODE_NAME_CASE(FSGNJX)
20038 NODE_NAME_CASE(FMAX)
20039 NODE_NAME_CASE(FMIN)
20040 NODE_NAME_CASE(READ_COUNTER_WIDE)
20041 NODE_NAME_CASE(BREV8)
20042 NODE_NAME_CASE(ORC_B)
20043 NODE_NAME_CASE(ZIP)
20044 NODE_NAME_CASE(UNZIP)
20045 NODE_NAME_CASE(CLMUL)
20046 NODE_NAME_CASE(CLMULH)
20047 NODE_NAME_CASE(CLMULR)
20048 NODE_NAME_CASE(MOPR)
20049 NODE_NAME_CASE(MOPRR)
20050 NODE_NAME_CASE(SHA256SIG0)
20051 NODE_NAME_CASE(SHA256SIG1)
20052 NODE_NAME_CASE(SHA256SUM0)
20053 NODE_NAME_CASE(SHA256SUM1)
20054 NODE_NAME_CASE(SM4KS)
20055 NODE_NAME_CASE(SM4ED)
20056 NODE_NAME_CASE(SM3P0)
20057 NODE_NAME_CASE(SM3P1)
20058 NODE_NAME_CASE(TH_LWD)
20059 NODE_NAME_CASE(TH_LWUD)
20060 NODE_NAME_CASE(TH_LDD)
20061 NODE_NAME_CASE(TH_SWD)
20062 NODE_NAME_CASE(TH_SDD)
20063 NODE_NAME_CASE(VMV_V_V_VL)
20064 NODE_NAME_CASE(VMV_V_X_VL)
20065 NODE_NAME_CASE(VFMV_V_F_VL)
20066 NODE_NAME_CASE(VMV_X_S)
20067 NODE_NAME_CASE(VMV_S_X_VL)
20068 NODE_NAME_CASE(VFMV_S_F_VL)
20069 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
20070 NODE_NAME_CASE(READ_VLENB)
20071 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
20072 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_SSAT)
20073 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_USAT)
20074 NODE_NAME_CASE(VSLIDEUP_VL)
20075 NODE_NAME_CASE(VSLIDE1UP_VL)
20076 NODE_NAME_CASE(VSLIDEDOWN_VL)
20077 NODE_NAME_CASE(VSLIDE1DOWN_VL)
20078 NODE_NAME_CASE(VFSLIDE1UP_VL)
20079 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
20080 NODE_NAME_CASE(VID_VL)
20081 NODE_NAME_CASE(VFNCVT_ROD_VL)
20082 NODE_NAME_CASE(VECREDUCE_ADD_VL)
20083 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
20084 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
20085 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
20086 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
20087 NODE_NAME_CASE(VECREDUCE_AND_VL)
20088 NODE_NAME_CASE(VECREDUCE_OR_VL)
20089 NODE_NAME_CASE(VECREDUCE_XOR_VL)
20090 NODE_NAME_CASE(VECREDUCE_FADD_VL)
20091 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
20092 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
20093 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
20094 NODE_NAME_CASE(ADD_VL)
20095 NODE_NAME_CASE(AND_VL)
20096 NODE_NAME_CASE(MUL_VL)
20097 NODE_NAME_CASE(OR_VL)
20098 NODE_NAME_CASE(SDIV_VL)
20099 NODE_NAME_CASE(SHL_VL)
20100 NODE_NAME_CASE(SREM_VL)
20101 NODE_NAME_CASE(SRA_VL)
20102 NODE_NAME_CASE(SRL_VL)
20103 NODE_NAME_CASE(ROTL_VL)
20104 NODE_NAME_CASE(ROTR_VL)
20105 NODE_NAME_CASE(SUB_VL)
20106 NODE_NAME_CASE(UDIV_VL)
20107 NODE_NAME_CASE(UREM_VL)
20108 NODE_NAME_CASE(XOR_VL)
20109 NODE_NAME_CASE(AVGFLOORS_VL)
20110 NODE_NAME_CASE(AVGFLOORU_VL)
20111 NODE_NAME_CASE(AVGCEILS_VL)
20112 NODE_NAME_CASE(AVGCEILU_VL)
20113 NODE_NAME_CASE(SADDSAT_VL)
20114 NODE_NAME_CASE(UADDSAT_VL)
20115 NODE_NAME_CASE(SSUBSAT_VL)
20116 NODE_NAME_CASE(USUBSAT_VL)
20117 NODE_NAME_CASE(FADD_VL)
20118 NODE_NAME_CASE(FSUB_VL)
20119 NODE_NAME_CASE(FMUL_VL)
20120 NODE_NAME_CASE(FDIV_VL)
20121 NODE_NAME_CASE(FNEG_VL)
20122 NODE_NAME_CASE(FABS_VL)
20123 NODE_NAME_CASE(FSQRT_VL)
20124 NODE_NAME_CASE(FCLASS_VL)
20125 NODE_NAME_CASE(VFMADD_VL)
20126 NODE_NAME_CASE(VFNMADD_VL)
20127 NODE_NAME_CASE(VFMSUB_VL)
20128 NODE_NAME_CASE(VFNMSUB_VL)
20129 NODE_NAME_CASE(VFWMADD_VL)
20130 NODE_NAME_CASE(VFWNMADD_VL)
20131 NODE_NAME_CASE(VFWMSUB_VL)
20132 NODE_NAME_CASE(VFWNMSUB_VL)
20133 NODE_NAME_CASE(FCOPYSIGN_VL)
20134 NODE_NAME_CASE(SMIN_VL)
20135 NODE_NAME_CASE(SMAX_VL)
20136 NODE_NAME_CASE(UMIN_VL)
20137 NODE_NAME_CASE(UMAX_VL)
20138 NODE_NAME_CASE(BITREVERSE_VL)
20139 NODE_NAME_CASE(BSWAP_VL)
20140 NODE_NAME_CASE(CTLZ_VL)
20141 NODE_NAME_CASE(CTTZ_VL)
20142 NODE_NAME_CASE(CTPOP_VL)
20143 NODE_NAME_CASE(VFMIN_VL)
20144 NODE_NAME_CASE(VFMAX_VL)
20145 NODE_NAME_CASE(MULHS_VL)
20146 NODE_NAME_CASE(MULHU_VL)
20147 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
20148 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
20149 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
20150 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
20151 NODE_NAME_CASE(VFCVT_X_F_VL)
20152 NODE_NAME_CASE(VFCVT_XU_F_VL)
20153 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
20154 NODE_NAME_CASE(SINT_TO_FP_VL)
20155 NODE_NAME_CASE(UINT_TO_FP_VL)
20156 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
20157 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
20158 NODE_NAME_CASE(FP_EXTEND_VL)
20159 NODE_NAME_CASE(FP_ROUND_VL)
20160 NODE_NAME_CASE(STRICT_FADD_VL)
20161 NODE_NAME_CASE(STRICT_FSUB_VL)
20162 NODE_NAME_CASE(STRICT_FMUL_VL)
20163 NODE_NAME_CASE(STRICT_FDIV_VL)
20164 NODE_NAME_CASE(STRICT_FSQRT_VL)
20165 NODE_NAME_CASE(STRICT_VFMADD_VL)
20166 NODE_NAME_CASE(STRICT_VFNMADD_VL)
20167 NODE_NAME_CASE(STRICT_VFMSUB_VL)
20168 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
20169 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
20170 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
20171 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
20172 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
20173 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
20174 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
20175 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
20176 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
20177 NODE_NAME_CASE(STRICT_FSETCC_VL)
20178 NODE_NAME_CASE(STRICT_FSETCCS_VL)
20179 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
20180 NODE_NAME_CASE(VWMUL_VL)
20181 NODE_NAME_CASE(VWMULU_VL)
20182 NODE_NAME_CASE(VWMULSU_VL)
20183 NODE_NAME_CASE(VWADD_VL)
20184 NODE_NAME_CASE(VWADDU_VL)
20185 NODE_NAME_CASE(VWSUB_VL)
20186 NODE_NAME_CASE(VWSUBU_VL)
20187 NODE_NAME_CASE(VWADD_W_VL)
20188 NODE_NAME_CASE(VWADDU_W_VL)
20189 NODE_NAME_CASE(VWSUB_W_VL)
20190 NODE_NAME_CASE(VWSUBU_W_VL)
20191 NODE_NAME_CASE(VWSLL_VL)
20192 NODE_NAME_CASE(VFWMUL_VL)
20193 NODE_NAME_CASE(VFWADD_VL)
20194 NODE_NAME_CASE(VFWSUB_VL)
20195 NODE_NAME_CASE(VFWADD_W_VL)
20196 NODE_NAME_CASE(VFWSUB_W_VL)
20197 NODE_NAME_CASE(VWMACC_VL)
20198 NODE_NAME_CASE(VWMACCU_VL)
20199 NODE_NAME_CASE(VWMACCSU_VL)
20200 NODE_NAME_CASE(VNSRL_VL)
20201 NODE_NAME_CASE(SETCC_VL)
20202 NODE_NAME_CASE(VMERGE_VL)
20203 NODE_NAME_CASE(VMAND_VL)
20204 NODE_NAME_CASE(VMOR_VL)
20205 NODE_NAME_CASE(VMXOR_VL)
20206 NODE_NAME_CASE(VMCLR_VL)
20207 NODE_NAME_CASE(VMSET_VL)
20208 NODE_NAME_CASE(VRGATHER_VX_VL)
20209 NODE_NAME_CASE(VRGATHER_VV_VL)
20210 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
20211 NODE_NAME_CASE(VSEXT_VL)
20212 NODE_NAME_CASE(VZEXT_VL)
20213 NODE_NAME_CASE(VCPOP_VL)
20214 NODE_NAME_CASE(VFIRST_VL)
20215 NODE_NAME_CASE(READ_CSR)
20216 NODE_NAME_CASE(WRITE_CSR)
20217 NODE_NAME_CASE(SWAP_CSR)
20218 NODE_NAME_CASE(CZERO_EQZ)
20219 NODE_NAME_CASE(CZERO_NEZ)
20220 NODE_NAME_CASE(SW_GUARDED_BRIND)
20221 NODE_NAME_CASE(SF_VC_XV_SE)
20222 NODE_NAME_CASE(SF_VC_IV_SE)
20223 NODE_NAME_CASE(SF_VC_VV_SE)
20224 NODE_NAME_CASE(SF_VC_FV_SE)
20225 NODE_NAME_CASE(SF_VC_XVV_SE)
20226 NODE_NAME_CASE(SF_VC_IVV_SE)
20227 NODE_NAME_CASE(SF_VC_VVV_SE)
20228 NODE_NAME_CASE(SF_VC_FVV_SE)
20229 NODE_NAME_CASE(SF_VC_XVW_SE)
20230 NODE_NAME_CASE(SF_VC_IVW_SE)
20231 NODE_NAME_CASE(SF_VC_VVW_SE)
20232 NODE_NAME_CASE(SF_VC_FVW_SE)
20233 NODE_NAME_CASE(SF_VC_V_X_SE)
20234 NODE_NAME_CASE(SF_VC_V_I_SE)
20235 NODE_NAME_CASE(SF_VC_V_XV_SE)
20236 NODE_NAME_CASE(SF_VC_V_IV_SE)
20237 NODE_NAME_CASE(SF_VC_V_VV_SE)
20238 NODE_NAME_CASE(SF_VC_V_FV_SE)
20239 NODE_NAME_CASE(SF_VC_V_XVV_SE)
20240 NODE_NAME_CASE(SF_VC_V_IVV_SE)
20241 NODE_NAME_CASE(SF_VC_V_VVV_SE)
20242 NODE_NAME_CASE(SF_VC_V_FVV_SE)
20243 NODE_NAME_CASE(SF_VC_V_XVW_SE)
20244 NODE_NAME_CASE(SF_VC_V_IVW_SE)
20245 NODE_NAME_CASE(SF_VC_V_VVW_SE)
20246 NODE_NAME_CASE(SF_VC_V_FVW_SE)
20247 }
20248 // clang-format on
20249 return nullptr;
20250#undef NODE_NAME_CASE
20251}
20252
20253/// getConstraintType - Given a constraint letter, return the type of
20254/// constraint it is for this target.
20257 if (Constraint.size() == 1) {
20258 switch (Constraint[0]) {
20259 default:
20260 break;
20261 case 'f':
20262 return C_RegisterClass;
20263 case 'I':
20264 case 'J':
20265 case 'K':
20266 return C_Immediate;
20267 case 'A':
20268 return C_Memory;
20269 case 's':
20270 case 'S': // A symbolic address
20271 return C_Other;
20272 }
20273 } else {
20274 if (Constraint == "vr" || Constraint == "vm")
20275 return C_RegisterClass;
20276 }
20277 return TargetLowering::getConstraintType(Constraint);
20278}
20279
20280std::pair<unsigned, const TargetRegisterClass *>
20282 StringRef Constraint,
20283 MVT VT) const {
20284 // First, see if this is a constraint that directly corresponds to a RISC-V
20285 // register class.
20286 if (Constraint.size() == 1) {
20287 switch (Constraint[0]) {
20288 case 'r':
20289 // TODO: Support fixed vectors up to XLen for P extension?
20290 if (VT.isVector())
20291 break;
20292 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
20293 return std::make_pair(0U, &RISCV::GPRF16RegClass);
20294 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
20295 return std::make_pair(0U, &RISCV::GPRF32RegClass);
20296 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20297 return std::make_pair(0U, &RISCV::GPRPairRegClass);
20298 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
20299 case 'f':
20300 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
20301 return std::make_pair(0U, &RISCV::FPR16RegClass);
20302 if (Subtarget.hasStdExtF() && VT == MVT::f32)
20303 return std::make_pair(0U, &RISCV::FPR32RegClass);
20304 if (Subtarget.hasStdExtD() && VT == MVT::f64)
20305 return std::make_pair(0U, &RISCV::FPR64RegClass);
20306 break;
20307 default:
20308 break;
20309 }
20310 } else if (Constraint == "vr") {
20311 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
20312 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20313 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
20314 return std::make_pair(0U, RC);
20315 }
20316 } else if (Constraint == "vm") {
20317 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
20318 return std::make_pair(0U, &RISCV::VMV0RegClass);
20319 }
20320
20321 // Clang will correctly decode the usage of register name aliases into their
20322 // official names. However, other frontends like `rustc` do not. This allows
20323 // users of these frontends to use the ABI names for registers in LLVM-style
20324 // register constraints.
20325 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
20326 .Case("{zero}", RISCV::X0)
20327 .Case("{ra}", RISCV::X1)
20328 .Case("{sp}", RISCV::X2)
20329 .Case("{gp}", RISCV::X3)
20330 .Case("{tp}", RISCV::X4)
20331 .Case("{t0}", RISCV::X5)
20332 .Case("{t1}", RISCV::X6)
20333 .Case("{t2}", RISCV::X7)
20334 .Cases("{s0}", "{fp}", RISCV::X8)
20335 .Case("{s1}", RISCV::X9)
20336 .Case("{a0}", RISCV::X10)
20337 .Case("{a1}", RISCV::X11)
20338 .Case("{a2}", RISCV::X12)
20339 .Case("{a3}", RISCV::X13)
20340 .Case("{a4}", RISCV::X14)
20341 .Case("{a5}", RISCV::X15)
20342 .Case("{a6}", RISCV::X16)
20343 .Case("{a7}", RISCV::X17)
20344 .Case("{s2}", RISCV::X18)
20345 .Case("{s3}", RISCV::X19)
20346 .Case("{s4}", RISCV::X20)
20347 .Case("{s5}", RISCV::X21)
20348 .Case("{s6}", RISCV::X22)
20349 .Case("{s7}", RISCV::X23)
20350 .Case("{s8}", RISCV::X24)
20351 .Case("{s9}", RISCV::X25)
20352 .Case("{s10}", RISCV::X26)
20353 .Case("{s11}", RISCV::X27)
20354 .Case("{t3}", RISCV::X28)
20355 .Case("{t4}", RISCV::X29)
20356 .Case("{t5}", RISCV::X30)
20357 .Case("{t6}", RISCV::X31)
20358 .Default(RISCV::NoRegister);
20359 if (XRegFromAlias != RISCV::NoRegister)
20360 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
20361
20362 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
20363 // TableGen record rather than the AsmName to choose registers for InlineAsm
20364 // constraints, plus we want to match those names to the widest floating point
20365 // register type available, manually select floating point registers here.
20366 //
20367 // The second case is the ABI name of the register, so that frontends can also
20368 // use the ABI names in register constraint lists.
20369 if (Subtarget.hasStdExtF()) {
20370 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
20371 .Cases("{f0}", "{ft0}", RISCV::F0_F)
20372 .Cases("{f1}", "{ft1}", RISCV::F1_F)
20373 .Cases("{f2}", "{ft2}", RISCV::F2_F)
20374 .Cases("{f3}", "{ft3}", RISCV::F3_F)
20375 .Cases("{f4}", "{ft4}", RISCV::F4_F)
20376 .Cases("{f5}", "{ft5}", RISCV::F5_F)
20377 .Cases("{f6}", "{ft6}", RISCV::F6_F)
20378 .Cases("{f7}", "{ft7}", RISCV::F7_F)
20379 .Cases("{f8}", "{fs0}", RISCV::F8_F)
20380 .Cases("{f9}", "{fs1}", RISCV::F9_F)
20381 .Cases("{f10}", "{fa0}", RISCV::F10_F)
20382 .Cases("{f11}", "{fa1}", RISCV::F11_F)
20383 .Cases("{f12}", "{fa2}", RISCV::F12_F)
20384 .Cases("{f13}", "{fa3}", RISCV::F13_F)
20385 .Cases("{f14}", "{fa4}", RISCV::F14_F)
20386 .Cases("{f15}", "{fa5}", RISCV::F15_F)
20387 .Cases("{f16}", "{fa6}", RISCV::F16_F)
20388 .Cases("{f17}", "{fa7}", RISCV::F17_F)
20389 .Cases("{f18}", "{fs2}", RISCV::F18_F)
20390 .Cases("{f19}", "{fs3}", RISCV::F19_F)
20391 .Cases("{f20}", "{fs4}", RISCV::F20_F)
20392 .Cases("{f21}", "{fs5}", RISCV::F21_F)
20393 .Cases("{f22}", "{fs6}", RISCV::F22_F)
20394 .Cases("{f23}", "{fs7}", RISCV::F23_F)
20395 .Cases("{f24}", "{fs8}", RISCV::F24_F)
20396 .Cases("{f25}", "{fs9}", RISCV::F25_F)
20397 .Cases("{f26}", "{fs10}", RISCV::F26_F)
20398 .Cases("{f27}", "{fs11}", RISCV::F27_F)
20399 .Cases("{f28}", "{ft8}", RISCV::F28_F)
20400 .Cases("{f29}", "{ft9}", RISCV::F29_F)
20401 .Cases("{f30}", "{ft10}", RISCV::F30_F)
20402 .Cases("{f31}", "{ft11}", RISCV::F31_F)
20403 .Default(RISCV::NoRegister);
20404 if (FReg != RISCV::NoRegister) {
20405 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
20406 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
20407 unsigned RegNo = FReg - RISCV::F0_F;
20408 unsigned DReg = RISCV::F0_D + RegNo;
20409 return std::make_pair(DReg, &RISCV::FPR64RegClass);
20410 }
20411 if (VT == MVT::f32 || VT == MVT::Other)
20412 return std::make_pair(FReg, &RISCV::FPR32RegClass);
20413 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
20414 unsigned RegNo = FReg - RISCV::F0_F;
20415 unsigned HReg = RISCV::F0_H + RegNo;
20416 return std::make_pair(HReg, &RISCV::FPR16RegClass);
20417 }
20418 }
20419 }
20420
20421 if (Subtarget.hasVInstructions()) {
20422 Register VReg = StringSwitch<Register>(Constraint.lower())
20423 .Case("{v0}", RISCV::V0)
20424 .Case("{v1}", RISCV::V1)
20425 .Case("{v2}", RISCV::V2)
20426 .Case("{v3}", RISCV::V3)
20427 .Case("{v4}", RISCV::V4)
20428 .Case("{v5}", RISCV::V5)
20429 .Case("{v6}", RISCV::V6)
20430 .Case("{v7}", RISCV::V7)
20431 .Case("{v8}", RISCV::V8)
20432 .Case("{v9}", RISCV::V9)
20433 .Case("{v10}", RISCV::V10)
20434 .Case("{v11}", RISCV::V11)
20435 .Case("{v12}", RISCV::V12)
20436 .Case("{v13}", RISCV::V13)
20437 .Case("{v14}", RISCV::V14)
20438 .Case("{v15}", RISCV::V15)
20439 .Case("{v16}", RISCV::V16)
20440 .Case("{v17}", RISCV::V17)
20441 .Case("{v18}", RISCV::V18)
20442 .Case("{v19}", RISCV::V19)
20443 .Case("{v20}", RISCV::V20)
20444 .Case("{v21}", RISCV::V21)
20445 .Case("{v22}", RISCV::V22)
20446 .Case("{v23}", RISCV::V23)
20447 .Case("{v24}", RISCV::V24)
20448 .Case("{v25}", RISCV::V25)
20449 .Case("{v26}", RISCV::V26)
20450 .Case("{v27}", RISCV::V27)
20451 .Case("{v28}", RISCV::V28)
20452 .Case("{v29}", RISCV::V29)
20453 .Case("{v30}", RISCV::V30)
20454 .Case("{v31}", RISCV::V31)
20455 .Default(RISCV::NoRegister);
20456 if (VReg != RISCV::NoRegister) {
20457 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
20458 return std::make_pair(VReg, &RISCV::VMRegClass);
20459 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
20460 return std::make_pair(VReg, &RISCV::VRRegClass);
20461 for (const auto *RC :
20462 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20463 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
20464 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
20465 return std::make_pair(VReg, RC);
20466 }
20467 }
20468 }
20469 }
20470
20471 std::pair<Register, const TargetRegisterClass *> Res =
20473
20474 // If we picked one of the Zfinx register classes, remap it to the GPR class.
20475 // FIXME: When Zfinx is supported in CodeGen this will need to take the
20476 // Subtarget into account.
20477 if (Res.second == &RISCV::GPRF16RegClass ||
20478 Res.second == &RISCV::GPRF32RegClass ||
20479 Res.second == &RISCV::GPRPairRegClass)
20480 return std::make_pair(Res.first, &RISCV::GPRRegClass);
20481
20482 return Res;
20483}
20484
20487 // Currently only support length 1 constraints.
20488 if (ConstraintCode.size() == 1) {
20489 switch (ConstraintCode[0]) {
20490 case 'A':
20492 default:
20493 break;
20494 }
20495 }
20496
20497 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
20498}
20499
20501 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
20502 SelectionDAG &DAG) const {
20503 // Currently only support length 1 constraints.
20504 if (Constraint.size() == 1) {
20505 switch (Constraint[0]) {
20506 case 'I':
20507 // Validate & create a 12-bit signed immediate operand.
20508 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20509 uint64_t CVal = C->getSExtValue();
20510 if (isInt<12>(CVal))
20511 Ops.push_back(DAG.getSignedConstant(
20512 CVal, SDLoc(Op), Subtarget.getXLenVT(), /*isTarget=*/true));
20513 }
20514 return;
20515 case 'J':
20516 // Validate & create an integer zero operand.
20517 if (isNullConstant(Op))
20518 Ops.push_back(
20519 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
20520 return;
20521 case 'K':
20522 // Validate & create a 5-bit unsigned immediate operand.
20523 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20524 uint64_t CVal = C->getZExtValue();
20525 if (isUInt<5>(CVal))
20526 Ops.push_back(
20527 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20528 }
20529 return;
20530 case 'S':
20532 return;
20533 default:
20534 break;
20535 }
20536 }
20537 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20538}
20539
20541 Instruction *Inst,
20542 AtomicOrdering Ord) const {
20543 if (Subtarget.hasStdExtZtso()) {
20544 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20545 return Builder.CreateFence(Ord);
20546 return nullptr;
20547 }
20548
20549 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20550 return Builder.CreateFence(Ord);
20551 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
20552 return Builder.CreateFence(AtomicOrdering::Release);
20553 return nullptr;
20554}
20555
20557 Instruction *Inst,
20558 AtomicOrdering Ord) const {
20559 if (Subtarget.hasStdExtZtso()) {
20560 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20561 return Builder.CreateFence(Ord);
20562 return nullptr;
20563 }
20564
20565 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
20566 return Builder.CreateFence(AtomicOrdering::Acquire);
20567 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
20570 return nullptr;
20571}
20572
20575 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
20576 // point operations can't be used in an lr/sc sequence without breaking the
20577 // forward-progress guarantee.
20578 if (AI->isFloatingPointOperation() ||
20582
20583 // Don't expand forced atomics, we want to have __sync libcalls instead.
20584 if (Subtarget.hasForcedAtomics())
20586
20587 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20588 if (AI->getOperation() == AtomicRMWInst::Nand) {
20589 if (Subtarget.hasStdExtZacas() &&
20590 (Size >= 32 || Subtarget.hasStdExtZabha()))
20592 if (Size < 32)
20594 }
20595
20596 if (Size < 32 && !Subtarget.hasStdExtZabha())
20598
20600}
20601
20602static Intrinsic::ID
20604 if (XLen == 32) {
20605 switch (BinOp) {
20606 default:
20607 llvm_unreachable("Unexpected AtomicRMW BinOp");
20609 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
20610 case AtomicRMWInst::Add:
20611 return Intrinsic::riscv_masked_atomicrmw_add_i32;
20612 case AtomicRMWInst::Sub:
20613 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
20615 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
20616 case AtomicRMWInst::Max:
20617 return Intrinsic::riscv_masked_atomicrmw_max_i32;
20618 case AtomicRMWInst::Min:
20619 return Intrinsic::riscv_masked_atomicrmw_min_i32;
20621 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
20623 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
20624 }
20625 }
20626
20627 if (XLen == 64) {
20628 switch (BinOp) {
20629 default:
20630 llvm_unreachable("Unexpected AtomicRMW BinOp");
20632 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
20633 case AtomicRMWInst::Add:
20634 return Intrinsic::riscv_masked_atomicrmw_add_i64;
20635 case AtomicRMWInst::Sub:
20636 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
20638 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
20639 case AtomicRMWInst::Max:
20640 return Intrinsic::riscv_masked_atomicrmw_max_i64;
20641 case AtomicRMWInst::Min:
20642 return Intrinsic::riscv_masked_atomicrmw_min_i64;
20644 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
20646 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
20647 }
20648 }
20649
20650 llvm_unreachable("Unexpected XLen\n");
20651}
20652
20654 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20655 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20656 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
20657 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
20658 // mask, as this produces better code than the LR/SC loop emitted by
20659 // int_riscv_masked_atomicrmw_xchg.
20660 if (AI->getOperation() == AtomicRMWInst::Xchg &&
20661 isa<ConstantInt>(AI->getValOperand())) {
20662 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
20663 if (CVal->isZero())
20664 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
20665 Builder.CreateNot(Mask, "Inv_Mask"),
20666 AI->getAlign(), Ord);
20667 if (CVal->isMinusOne())
20668 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
20669 AI->getAlign(), Ord);
20670 }
20671
20672 unsigned XLen = Subtarget.getXLen();
20673 Value *Ordering =
20674 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
20675 Type *Tys[] = {AlignedAddr->getType()};
20676 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
20677 AI->getModule(),
20679
20680 if (XLen == 64) {
20681 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
20682 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20683 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
20684 }
20685
20686 Value *Result;
20687
20688 // Must pass the shift amount needed to sign extend the loaded value prior
20689 // to performing a signed comparison for min/max. ShiftAmt is the number of
20690 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
20691 // is the number of bits to left+right shift the value in order to
20692 // sign-extend.
20693 if (AI->getOperation() == AtomicRMWInst::Min ||
20695 const DataLayout &DL = AI->getDataLayout();
20696 unsigned ValWidth =
20697 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
20698 Value *SextShamt =
20699 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
20700 Result = Builder.CreateCall(LrwOpScwLoop,
20701 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
20702 } else {
20703 Result =
20704 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
20705 }
20706
20707 if (XLen == 64)
20708 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20709 return Result;
20710}
20711
20714 AtomicCmpXchgInst *CI) const {
20715 // Don't expand forced atomics, we want to have __sync libcalls instead.
20716 if (Subtarget.hasForcedAtomics())
20718
20720 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
20721 (Size == 8 || Size == 16))
20724}
20725
20727 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20728 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20729 unsigned XLen = Subtarget.getXLen();
20730 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
20731 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
20732 if (XLen == 64) {
20733 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
20734 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
20735 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20736 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
20737 }
20738 Type *Tys[] = {AlignedAddr->getType()};
20739 Function *MaskedCmpXchg =
20740 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
20741 Value *Result = Builder.CreateCall(
20742 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
20743 if (XLen == 64)
20744 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20745 return Result;
20746}
20747
20749 EVT DataVT) const {
20750 // We have indexed loads for all supported EEW types. Indices are always
20751 // zero extended.
20752 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
20753 isTypeLegal(Extend.getValueType()) &&
20754 isTypeLegal(Extend.getOperand(0).getValueType()) &&
20755 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
20756}
20757
20759 EVT VT) const {
20760 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
20761 return false;
20762
20763 switch (FPVT.getSimpleVT().SimpleTy) {
20764 case MVT::f16:
20765 return Subtarget.hasStdExtZfhmin();
20766 case MVT::f32:
20767 return Subtarget.hasStdExtF();
20768 case MVT::f64:
20769 return Subtarget.hasStdExtD();
20770 default:
20771 return false;
20772 }
20773}
20774
20776 // If we are using the small code model, we can reduce size of jump table
20777 // entry to 4 bytes.
20778 if (Subtarget.is64Bit() && !isPositionIndependent() &&
20781 }
20783}
20784
20786 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
20787 unsigned uid, MCContext &Ctx) const {
20788 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
20790 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
20791}
20792
20794 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
20795 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
20796 // a power of two as well.
20797 // FIXME: This doesn't work for zve32, but that's already broken
20798 // elsewhere for the same reason.
20799 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
20800 static_assert(RISCV::RVVBitsPerBlock == 64,
20801 "RVVBitsPerBlock changed, audit needed");
20802 return true;
20803}
20804
20806 SDValue &Offset,
20808 SelectionDAG &DAG) const {
20809 // Target does not support indexed loads.
20810 if (!Subtarget.hasVendorXTHeadMemIdx())
20811 return false;
20812
20813 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
20814 return false;
20815
20816 Base = Op->getOperand(0);
20817 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
20818 int64_t RHSC = RHS->getSExtValue();
20819 if (Op->getOpcode() == ISD::SUB)
20820 RHSC = -(uint64_t)RHSC;
20821
20822 // The constants that can be encoded in the THeadMemIdx instructions
20823 // are of the form (sign_extend(imm5) << imm2).
20824 bool isLegalIndexedOffset = false;
20825 for (unsigned i = 0; i < 4; i++)
20826 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
20827 isLegalIndexedOffset = true;
20828 break;
20829 }
20830
20831 if (!isLegalIndexedOffset)
20832 return false;
20833
20834 Offset = Op->getOperand(1);
20835 return true;
20836 }
20837
20838 return false;
20839}
20840
20842 SDValue &Offset,
20844 SelectionDAG &DAG) const {
20845 EVT VT;
20846 SDValue Ptr;
20847 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20848 VT = LD->getMemoryVT();
20849 Ptr = LD->getBasePtr();
20850 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20851 VT = ST->getMemoryVT();
20852 Ptr = ST->getBasePtr();
20853 } else
20854 return false;
20855
20856 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
20857 return false;
20858
20859 AM = ISD::PRE_INC;
20860 return true;
20861}
20862
20864 SDValue &Base,
20865 SDValue &Offset,
20867 SelectionDAG &DAG) const {
20868 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
20869 if (Op->getOpcode() != ISD::ADD)
20870 return false;
20871
20872 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N))
20873 Base = LS->getBasePtr();
20874 else
20875 return false;
20876
20877 if (Base == Op->getOperand(0))
20878 Offset = Op->getOperand(1);
20879 else if (Base == Op->getOperand(1))
20880 Offset = Op->getOperand(0);
20881 else
20882 return false;
20883
20884 AM = ISD::POST_INC;
20885 return true;
20886 }
20887
20888 EVT VT;
20889 SDValue Ptr;
20890 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20891 VT = LD->getMemoryVT();
20892 Ptr = LD->getBasePtr();
20893 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20894 VT = ST->getMemoryVT();
20895 Ptr = ST->getBasePtr();
20896 } else
20897 return false;
20898
20899 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
20900 return false;
20901 // Post-indexing updates the base, so it's not a valid transform
20902 // if that's not the same as the load's pointer.
20903 if (Ptr != Base)
20904 return false;
20905
20906 AM = ISD::POST_INC;
20907 return true;
20908}
20909
20911 EVT VT) const {
20912 EVT SVT = VT.getScalarType();
20913
20914 if (!SVT.isSimple())
20915 return false;
20916
20917 switch (SVT.getSimpleVT().SimpleTy) {
20918 case MVT::f16:
20919 return VT.isVector() ? Subtarget.hasVInstructionsF16()
20920 : Subtarget.hasStdExtZfhOrZhinx();
20921 case MVT::f32:
20922 return Subtarget.hasStdExtFOrZfinx();
20923 case MVT::f64:
20924 return Subtarget.hasStdExtDOrZdinx();
20925 default:
20926 break;
20927 }
20928
20929 return false;
20930}
20931
20933 // Zacas will use amocas.w which does not require extension.
20934 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
20935}
20936
20938 const Constant *PersonalityFn) const {
20939 return RISCV::X10;
20940}
20941
20943 const Constant *PersonalityFn) const {
20944 return RISCV::X11;
20945}
20946
20948 // Return false to suppress the unnecessary extensions if the LibCall
20949 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
20950 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
20951 Type.getSizeInBits() < Subtarget.getXLen()))
20952 return false;
20953
20954 return true;
20955}
20956
20958 if (Subtarget.is64Bit() && Type == MVT::i32)
20959 return true;
20960
20961 return IsSigned;
20962}
20963
20965 SDValue C) const {
20966 // Check integral scalar types.
20967 if (!VT.isScalarInteger())
20968 return false;
20969
20970 // Omit the optimization if the sub target has the M extension and the data
20971 // size exceeds XLen.
20972 const bool HasZmmul = Subtarget.hasStdExtZmmul();
20973 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
20974 return false;
20975
20976 auto *ConstNode = cast<ConstantSDNode>(C);
20977 const APInt &Imm = ConstNode->getAPIntValue();
20978
20979 // Break the MUL to a SLLI and an ADD/SUB.
20980 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
20981 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
20982 return true;
20983
20984 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
20985 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
20986 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
20987 (Imm - 8).isPowerOf2()))
20988 return true;
20989
20990 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
20991 // a pair of LUI/ADDI.
20992 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
20993 ConstNode->hasOneUse()) {
20994 APInt ImmS = Imm.ashr(Imm.countr_zero());
20995 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
20996 (1 - ImmS).isPowerOf2())
20997 return true;
20998 }
20999
21000 return false;
21001}
21002
21004 SDValue ConstNode) const {
21005 // Let the DAGCombiner decide for vectors.
21006 EVT VT = AddNode.getValueType();
21007 if (VT.isVector())
21008 return true;
21009
21010 // Let the DAGCombiner decide for larger types.
21011 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
21012 return true;
21013
21014 // It is worse if c1 is simm12 while c1*c2 is not.
21015 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
21016 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
21017 const APInt &C1 = C1Node->getAPIntValue();
21018 const APInt &C2 = C2Node->getAPIntValue();
21019 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
21020 return false;
21021
21022 // Default to true and let the DAGCombiner decide.
21023 return true;
21024}
21025
21027 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
21028 unsigned *Fast) const {
21029 if (!VT.isVector()) {
21030 if (Fast)
21031 *Fast = Subtarget.enableUnalignedScalarMem();
21032 return Subtarget.enableUnalignedScalarMem();
21033 }
21034
21035 // All vector implementations must support element alignment
21036 EVT ElemVT = VT.getVectorElementType();
21037 if (Alignment >= ElemVT.getStoreSize()) {
21038 if (Fast)
21039 *Fast = 1;
21040 return true;
21041 }
21042
21043 // Note: We lower an unmasked unaligned vector access to an equally sized
21044 // e8 element type access. Given this, we effectively support all unmasked
21045 // misaligned accesses. TODO: Work through the codegen implications of
21046 // allowing such accesses to be formed, and considered fast.
21047 if (Fast)
21048 *Fast = Subtarget.enableUnalignedVectorMem();
21049 return Subtarget.enableUnalignedVectorMem();
21050}
21051
21052
21054 const AttributeList &FuncAttributes) const {
21055 if (!Subtarget.hasVInstructions())
21056 return MVT::Other;
21057
21058 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
21059 return MVT::Other;
21060
21061 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
21062 // has an expansion threshold, and we want the number of hardware memory
21063 // operations to correspond roughly to that threshold. LMUL>1 operations
21064 // are typically expanded linearly internally, and thus correspond to more
21065 // than one actual memory operation. Note that store merging and load
21066 // combining will typically form larger LMUL operations from the LMUL1
21067 // operations emitted here, and that's okay because combining isn't
21068 // introducing new memory operations; it's just merging existing ones.
21069 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
21070 if (Op.size() < MinVLenInBytes)
21071 // TODO: Figure out short memops. For the moment, do the default thing
21072 // which ends up using scalar sequences.
21073 return MVT::Other;
21074
21075 // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
21076 // fixed vectors.
21077 if (MinVLenInBytes <= RISCV::RVVBitsPerBlock / 8)
21078 return MVT::Other;
21079
21080 // Prefer i8 for non-zero memset as it allows us to avoid materializing
21081 // a large scalar constant and instead use vmv.v.x/i to do the
21082 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
21083 // maximize the chance we can encode the size in the vsetvli.
21084 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
21085 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
21086
21087 // Do we have sufficient alignment for our preferred VT? If not, revert
21088 // to largest size allowed by our alignment criteria.
21089 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
21090 Align RequiredAlign(PreferredVT.getStoreSize());
21091 if (Op.isFixedDstAlign())
21092 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
21093 if (Op.isMemcpy())
21094 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
21095 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
21096 }
21097 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
21098}
21099
21101 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
21102 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
21103 bool IsABIRegCopy = CC.has_value();
21104 EVT ValueVT = Val.getValueType();
21105 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
21106 PartVT == MVT::f32) {
21107 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
21108 // nan, and cast to f32.
21109 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
21110 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
21111 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
21112 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
21113 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
21114 Parts[0] = Val;
21115 return true;
21116 }
21117
21118 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
21119 LLVMContext &Context = *DAG.getContext();
21120 EVT ValueEltVT = ValueVT.getVectorElementType();
21121 EVT PartEltVT = PartVT.getVectorElementType();
21122 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21123 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21124 if (PartVTBitSize % ValueVTBitSize == 0) {
21125 assert(PartVTBitSize >= ValueVTBitSize);
21126 // If the element types are different, bitcast to the same element type of
21127 // PartVT first.
21128 // Give an example here, we want copy a <vscale x 1 x i8> value to
21129 // <vscale x 4 x i16>.
21130 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
21131 // subvector, then we can bitcast to <vscale x 4 x i16>.
21132 if (ValueEltVT != PartEltVT) {
21133 if (PartVTBitSize > ValueVTBitSize) {
21134 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21135 assert(Count != 0 && "The number of element should not be zero.");
21136 EVT SameEltTypeVT =
21137 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21138 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
21139 DAG.getUNDEF(SameEltTypeVT), Val,
21140 DAG.getVectorIdxConstant(0, DL));
21141 }
21142 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
21143 } else {
21144 Val =
21145 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
21146 Val, DAG.getVectorIdxConstant(0, DL));
21147 }
21148 Parts[0] = Val;
21149 return true;
21150 }
21151 }
21152 return false;
21153}
21154
21156 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
21157 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
21158 bool IsABIRegCopy = CC.has_value();
21159 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
21160 PartVT == MVT::f32) {
21161 SDValue Val = Parts[0];
21162
21163 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
21164 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
21165 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
21166 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
21167 return Val;
21168 }
21169
21170 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
21171 LLVMContext &Context = *DAG.getContext();
21172 SDValue Val = Parts[0];
21173 EVT ValueEltVT = ValueVT.getVectorElementType();
21174 EVT PartEltVT = PartVT.getVectorElementType();
21175 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21176 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21177 if (PartVTBitSize % ValueVTBitSize == 0) {
21178 assert(PartVTBitSize >= ValueVTBitSize);
21179 EVT SameEltTypeVT = ValueVT;
21180 // If the element types are different, convert it to the same element type
21181 // of PartVT.
21182 // Give an example here, we want copy a <vscale x 1 x i8> value from
21183 // <vscale x 4 x i16>.
21184 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
21185 // then we can extract <vscale x 1 x i8>.
21186 if (ValueEltVT != PartEltVT) {
21187 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21188 assert(Count != 0 && "The number of element should not be zero.");
21189 SameEltTypeVT =
21190 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21191 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
21192 }
21193 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
21194 DAG.getVectorIdxConstant(0, DL));
21195 return Val;
21196 }
21197 }
21198 return SDValue();
21199}
21200
21202 // When aggressively optimizing for code size, we prefer to use a div
21203 // instruction, as it is usually smaller than the alternative sequence.
21204 // TODO: Add vector division?
21205 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
21206 return OptSize && !VT.isVector();
21207}
21208
21210 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
21211 // some situation.
21212 unsigned Opc = N->getOpcode();
21213 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
21214 return false;
21215 return true;
21216}
21217
21218static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
21219 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
21220 Function *ThreadPointerFunc =
21221 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
21222 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
21223 IRB.CreateCall(ThreadPointerFunc), Offset);
21224}
21225
21227 // Fuchsia provides a fixed TLS slot for the stack cookie.
21228 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
21229 if (Subtarget.isTargetFuchsia())
21230 return useTpOffset(IRB, -0x10);
21231
21232 // Android provides a fixed TLS slot for the stack cookie. See the definition
21233 // of TLS_SLOT_STACK_GUARD in
21234 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
21235 if (Subtarget.isTargetAndroid())
21236 return useTpOffset(IRB, -0x18);
21237
21239}
21240
21242 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
21243 const DataLayout &DL) const {
21244 EVT VT = getValueType(DL, VTy);
21245 // Don't lower vlseg/vsseg for vector types that can't be split.
21246 if (!isTypeLegal(VT))
21247 return false;
21248
21250 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
21251 Alignment))
21252 return false;
21253
21254 MVT ContainerVT = VT.getSimpleVT();
21255
21256 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21257 if (!Subtarget.useRVVForFixedLengthVectors())
21258 return false;
21259 // Sometimes the interleaved access pass picks up splats as interleaves of
21260 // one element. Don't lower these.
21261 if (FVTy->getNumElements() < 2)
21262 return false;
21263
21265 } else {
21266 // The intrinsics for scalable vectors are not overloaded on pointer type
21267 // and can only handle the default address space.
21268 if (AddrSpace)
21269 return false;
21270 }
21271
21272 // Need to make sure that EMUL * NFIELDS ≤ 8
21273 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
21274 if (Fractional)
21275 return true;
21276 return Factor * LMUL <= 8;
21277}
21278
21280 Align Alignment) const {
21281 if (!Subtarget.hasVInstructions())
21282 return false;
21283
21284 // Only support fixed vectors if we know the minimum vector size.
21285 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
21286 return false;
21287
21288 EVT ScalarType = DataType.getScalarType();
21289 if (!isLegalElementTypeForRVV(ScalarType))
21290 return false;
21291
21292 if (!Subtarget.enableUnalignedVectorMem() &&
21293 Alignment < ScalarType.getStoreSize())
21294 return false;
21295
21296 return true;
21297}
21298
21300 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
21301 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
21302 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
21303 Intrinsic::riscv_seg8_load};
21304
21305/// Lower an interleaved load into a vlsegN intrinsic.
21306///
21307/// E.g. Lower an interleaved load (Factor = 2):
21308/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
21309/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21310/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21311///
21312/// Into:
21313/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
21314/// %ptr, i64 4)
21315/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
21316/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
21319 ArrayRef<unsigned> Indices, unsigned Factor) const {
21320 IRBuilder<> Builder(LI);
21321
21322 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
21323 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
21325 LI->getDataLayout()))
21326 return false;
21327
21328 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21329
21330 Function *VlsegNFunc =
21332 {VTy, LI->getPointerOperandType(), XLenTy});
21333
21334 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
21335
21336 CallInst *VlsegN =
21337 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
21338
21339 for (unsigned i = 0; i < Shuffles.size(); i++) {
21340 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
21341 Shuffles[i]->replaceAllUsesWith(SubVec);
21342 }
21343
21344 return true;
21345}
21346
21348 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
21349 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
21350 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
21351 Intrinsic::riscv_seg8_store};
21352
21353/// Lower an interleaved store into a vssegN intrinsic.
21354///
21355/// E.g. Lower an interleaved store (Factor = 3):
21356/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21357/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21358/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21359///
21360/// Into:
21361/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21362/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21363/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21364/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
21365/// %ptr, i32 4)
21366///
21367/// Note that the new shufflevectors will be removed and we'll only generate one
21368/// vsseg3 instruction in CodeGen.
21370 ShuffleVectorInst *SVI,
21371 unsigned Factor) const {
21372 IRBuilder<> Builder(SI);
21373 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
21374 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
21375 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
21376 ShuffleVTy->getNumElements() / Factor);
21377 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
21378 SI->getPointerAddressSpace(),
21379 SI->getDataLayout()))
21380 return false;
21381
21382 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21383
21384 Function *VssegNFunc =
21385 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
21386 {VTy, SI->getPointerOperandType(), XLenTy});
21387
21388 auto Mask = SVI->getShuffleMask();
21390
21391 for (unsigned i = 0; i < Factor; i++) {
21392 Value *Shuffle = Builder.CreateShuffleVector(
21393 SVI->getOperand(0), SVI->getOperand(1),
21394 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
21395 Ops.push_back(Shuffle);
21396 }
21397 // This VL should be OK (should be executable in one vsseg instruction,
21398 // potentially under larger LMULs) because we checked that the fixed vector
21399 // type fits in isLegalInterleavedAccessType
21400 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
21401 Ops.append({SI->getPointerOperand(), VL});
21402
21403 Builder.CreateCall(VssegNFunc, Ops);
21404
21405 return true;
21406}
21407
21409 IntrinsicInst *DI, LoadInst *LI,
21410 SmallVectorImpl<Instruction *> &DeadInsts) const {
21411 assert(LI->isSimple());
21412 IRBuilder<> Builder(LI);
21413
21414 // Only deinterleave2 supported at present.
21415 if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
21416 return false;
21417
21418 unsigned Factor = 2;
21419
21420 VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
21421 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
21422
21423 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
21425 LI->getDataLayout()))
21426 return false;
21427
21428 Function *VlsegNFunc;
21429 Value *VL;
21430 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21432
21433 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21434 VlsegNFunc = Intrinsic::getDeclaration(
21435 LI->getModule(), FixedVlsegIntrIds[Factor - 2],
21436 {ResVTy, LI->getPointerOperandType(), XLenTy});
21437 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21438 } else {
21439 static const Intrinsic::ID IntrIds[] = {
21440 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
21441 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
21442 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
21443 Intrinsic::riscv_vlseg8};
21444
21445 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],
21446 {ResVTy, XLenTy});
21447 VL = Constant::getAllOnesValue(XLenTy);
21448 Ops.append(Factor, PoisonValue::get(ResVTy));
21449 }
21450
21451 Ops.append({LI->getPointerOperand(), VL});
21452
21453 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
21454 DI->replaceAllUsesWith(Vlseg);
21455
21456 return true;
21457}
21458
21461 SmallVectorImpl<Instruction *> &DeadInsts) const {
21462 assert(SI->isSimple());
21463 IRBuilder<> Builder(SI);
21464
21465 // Only interleave2 supported at present.
21466 if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
21467 return false;
21468
21469 unsigned Factor = 2;
21470
21471 VectorType *VTy = cast<VectorType>(II->getType());
21472 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
21473
21474 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
21475 SI->getPointerAddressSpace(),
21476 SI->getDataLayout()))
21477 return false;
21478
21479 Function *VssegNFunc;
21480 Value *VL;
21481 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21482
21483 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21484 VssegNFunc = Intrinsic::getDeclaration(
21485 SI->getModule(), FixedVssegIntrIds[Factor - 2],
21486 {InVTy, SI->getPointerOperandType(), XLenTy});
21487 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21488 } else {
21489 static const Intrinsic::ID IntrIds[] = {
21490 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
21491 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
21492 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
21493 Intrinsic::riscv_vsseg8};
21494
21495 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
21496 {InVTy, XLenTy});
21497 VL = Constant::getAllOnesValue(XLenTy);
21498 }
21499
21500 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
21501 SI->getPointerOperand(), VL});
21502
21503 return true;
21504}
21505
21509 const TargetInstrInfo *TII) const {
21510 assert(MBBI->isCall() && MBBI->getCFIType() &&
21511 "Invalid call instruction for a KCFI check");
21512 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
21513 MBBI->getOpcode()));
21514
21515 MachineOperand &Target = MBBI->getOperand(0);
21516 Target.setIsRenamable(false);
21517
21518 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
21519 .addReg(Target.getReg())
21520 .addImm(MBBI->getCFIType())
21521 .getInstr();
21522}
21523
21524#define GET_REGISTER_MATCHER
21525#include "RISCVGenAsmMatcher.inc"
21526
21529 const MachineFunction &MF) const {
21531 if (Reg == RISCV::NoRegister)
21533 if (Reg == RISCV::NoRegister)
21535 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
21536 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
21537 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
21538 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
21539 StringRef(RegName) + "\"."));
21540 return Reg;
21541}
21542
21545 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
21546
21547 if (NontemporalInfo == nullptr)
21549
21550 // 1 for default value work as __RISCV_NTLH_ALL
21551 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
21552 // 3 -> __RISCV_NTLH_ALL_PRIVATE
21553 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
21554 // 5 -> __RISCV_NTLH_ALL
21555 int NontemporalLevel = 5;
21556 const MDNode *RISCVNontemporalInfo =
21557 I.getMetadata("riscv-nontemporal-domain");
21558 if (RISCVNontemporalInfo != nullptr)
21559 NontemporalLevel =
21560 cast<ConstantInt>(
21561 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
21562 ->getValue())
21563 ->getZExtValue();
21564
21565 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
21566 "RISC-V target doesn't support this non-temporal domain.");
21567
21568 NontemporalLevel -= 2;
21570 if (NontemporalLevel & 0b1)
21571 Flags |= MONontemporalBit0;
21572 if (NontemporalLevel & 0b10)
21573 Flags |= MONontemporalBit1;
21574
21575 return Flags;
21576}
21577
21580
21581 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
21583 TargetFlags |= (NodeFlags & MONontemporalBit0);
21584 TargetFlags |= (NodeFlags & MONontemporalBit1);
21585 return TargetFlags;
21586}
21587
21589 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
21590 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
21591}
21592
21594 if (VT.isScalableVector())
21595 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
21596 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
21597 return true;
21598 return Subtarget.hasStdExtZbb() &&
21599 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
21600}
21601
21603 ISD::CondCode Cond) const {
21604 return isCtpopFast(VT) ? 0 : 1;
21605}
21606
21608
21609 // GISel support is in progress or complete for these opcodes.
21610 unsigned Op = Inst.getOpcode();
21611 if (Op == Instruction::Add || Op == Instruction::Sub ||
21612 Op == Instruction::And || Op == Instruction::Or ||
21613 Op == Instruction::Xor || Op == Instruction::InsertElement ||
21614 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
21615 Op == Instruction::Freeze || Op == Instruction::Store)
21616 return false;
21617
21618 if (Inst.getType()->isScalableTy())
21619 return true;
21620
21621 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
21622 if (Inst.getOperand(i)->getType()->isScalableTy() &&
21623 !isa<ReturnInst>(&Inst))
21624 return true;
21625
21626 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
21627 if (AI->getAllocatedType()->isScalableTy())
21628 return true;
21629 }
21630
21631 return false;
21632}
21633
21634SDValue
21635RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
21636 SelectionDAG &DAG,
21637 SmallVectorImpl<SDNode *> &Created) const {
21639 if (isIntDivCheap(N->getValueType(0), Attr))
21640 return SDValue(N, 0); // Lower SDIV as SDIV
21641
21642 // Only perform this transform if short forward branch opt is supported.
21643 if (!Subtarget.hasShortForwardBranchOpt())
21644 return SDValue();
21645 EVT VT = N->getValueType(0);
21646 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
21647 return SDValue();
21648
21649 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
21650 if (Divisor.sgt(2048) || Divisor.slt(-2048))
21651 return SDValue();
21652 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
21653}
21654
21655bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
21656 EVT VT, const APInt &AndMask) const {
21657 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
21658 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
21660}
21661
21662unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
21663 return Subtarget.getMinimumJumpTableEntries();
21664}
21665
21666// Handle single arg such as return value.
21667template <typename Arg>
21668void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) {
21669 // This lambda determines whether an array of types are constructed by
21670 // homogeneous vector types.
21671 auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) {
21672 // First, extract the first element in the argument type.
21673 auto It = ArgList.begin();
21674 MVT FirstArgRegType = It->VT;
21675
21676 // Return if there is no return or the type needs split.
21677 if (It == ArgList.end() || It->Flags.isSplit())
21678 return false;
21679
21680 ++It;
21681
21682 // Return if this argument type contains only 1 element, or it's not a
21683 // vector type.
21684 if (It == ArgList.end() || !FirstArgRegType.isScalableVector())
21685 return false;
21686
21687 // Second, check if the following elements in this argument type are all the
21688 // same.
21689 for (; It != ArgList.end(); ++It)
21690 if (It->Flags.isSplit() || It->VT != FirstArgRegType)
21691 return false;
21692
21693 return true;
21694 };
21695
21696 if (isHomogeneousScalableVectorType(ArgList)) {
21697 // Handle as tuple type
21698 RVVArgInfos.push_back({(unsigned)ArgList.size(), ArgList[0].VT, false});
21699 } else {
21700 // Handle as normal vector type
21701 bool FirstVMaskAssigned = false;
21702 for (const auto &OutArg : ArgList) {
21703 MVT RegisterVT = OutArg.VT;
21704
21705 // Skip non-RVV register type
21706 if (!RegisterVT.isVector())
21707 continue;
21708
21709 if (RegisterVT.isFixedLengthVector())
21710 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21711
21712 if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) {
21713 RVVArgInfos.push_back({1, RegisterVT, true});
21714 FirstVMaskAssigned = true;
21715 continue;
21716 }
21717
21718 RVVArgInfos.push_back({1, RegisterVT, false});
21719 }
21720 }
21721}
21722
21723// Handle multiple args.
21724template <>
21725void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) {
21726 const DataLayout &DL = MF->getDataLayout();
21727 const Function &F = MF->getFunction();
21728 LLVMContext &Context = F.getContext();
21729
21730 bool FirstVMaskAssigned = false;
21731 for (Type *Ty : TypeList) {
21732 StructType *STy = dyn_cast<StructType>(Ty);
21733 if (STy && STy->containsHomogeneousScalableVectorTypes()) {
21734 Type *ElemTy = STy->getTypeAtIndex(0U);
21735 EVT VT = TLI->getValueType(DL, ElemTy);
21736 MVT RegisterVT =
21737 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21738 unsigned NumRegs =
21739 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21740
21741 RVVArgInfos.push_back(
21742 {NumRegs * STy->getNumElements(), RegisterVT, false});
21743 } else {
21744 SmallVector<EVT, 4> ValueVTs;
21745 ComputeValueVTs(*TLI, DL, Ty, ValueVTs);
21746
21747 for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
21748 ++Value) {
21749 EVT VT = ValueVTs[Value];
21750 MVT RegisterVT =
21751 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21752 unsigned NumRegs =
21753 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21754
21755 // Skip non-RVV register type
21756 if (!RegisterVT.isVector())
21757 continue;
21758
21759 if (RegisterVT.isFixedLengthVector())
21760 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21761
21762 if (!FirstVMaskAssigned &&
21763 RegisterVT.getVectorElementType() == MVT::i1) {
21764 RVVArgInfos.push_back({1, RegisterVT, true});
21765 FirstVMaskAssigned = true;
21766 --NumRegs;
21767 }
21768
21769 RVVArgInfos.insert(RVVArgInfos.end(), NumRegs, {1, RegisterVT, false});
21770 }
21771 }
21772 }
21773}
21774
21775void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul,
21776 unsigned StartReg) {
21777 assert((StartReg % LMul) == 0 &&
21778 "Start register number should be multiple of lmul");
21779 const MCPhysReg *VRArrays;
21780 switch (LMul) {
21781 default:
21782 report_fatal_error("Invalid lmul");
21783 case 1:
21784 VRArrays = ArgVRs;
21785 break;
21786 case 2:
21787 VRArrays = ArgVRM2s;
21788 break;
21789 case 4:
21790 VRArrays = ArgVRM4s;
21791 break;
21792 case 8:
21793 VRArrays = ArgVRM8s;
21794 break;
21795 }
21796
21797 for (unsigned i = 0; i < NF; ++i)
21798 if (StartReg)
21799 AllocatedPhysRegs.push_back(VRArrays[(StartReg - 8) / LMul + i]);
21800 else
21801 AllocatedPhysRegs.push_back(MCPhysReg());
21802}
21803
21804/// This function determines if each RVV argument is passed by register, if the
21805/// argument can be assigned to a VR, then give it a specific register.
21806/// Otherwise, assign the argument to 0 which is a invalid MCPhysReg.
21807void RVVArgDispatcher::compute() {
21808 uint32_t AssignedMap = 0;
21809 auto allocate = [&](const RVVArgInfo &ArgInfo) {
21810 // Allocate first vector mask argument to V0.
21811 if (ArgInfo.FirstVMask) {
21812 AllocatedPhysRegs.push_back(RISCV::V0);
21813 return;
21814 }
21815
21816 unsigned RegsNeeded = divideCeil(
21817 ArgInfo.VT.getSizeInBits().getKnownMinValue(), RISCV::RVVBitsPerBlock);
21818 unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded;
21819 for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs;
21820 StartReg += RegsNeeded) {
21821 uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg;
21822 if ((AssignedMap & Map) == 0) {
21823 allocatePhysReg(ArgInfo.NF, RegsNeeded, StartReg + 8);
21824 AssignedMap |= Map;
21825 return;
21826 }
21827 }
21828
21829 allocatePhysReg(ArgInfo.NF, RegsNeeded, 0);
21830 };
21831
21832 for (unsigned i = 0; i < RVVArgInfos.size(); ++i)
21833 allocate(RVVArgInfos[i]);
21834}
21835
21837 assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range");
21838 return AllocatedPhysRegs[CurIdx++];
21839}
21840
21843 int JTI,
21844 SelectionDAG &DAG) const {
21845 if (Subtarget.hasStdExtZicfilp()) {
21846 // When Zicfilp enabled, we need to use software guarded branch for jump
21847 // table branch.
21848 SDValue JTInfo = DAG.getJumpTableDebugInfo(JTI, Value, dl);
21849 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, JTInfo,
21850 Addr);
21851 }
21852 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
21853}
21854
21856
21857#define GET_RISCVVIntrinsicsTable_IMPL
21858#include "RISCVGenSearchableTables.inc"
21859
21860} // namespace llvm::RISCVVIntrinsicsTable
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
amdgpu AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define NL
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define RegName(no)
const MCPhysReg ArgFPR32s[]
const MCPhysReg ArgVRs[]
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
const MCPhysReg ArgFPR64s[]
const MCPhysReg ArgGPRs[]
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2, bool EABI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static bool hasPassthruOp(unsigned Opcode)
Return true if a RISC-V target specified op has a passthru operand.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static const MCPhysReg ArgVRM2s[]
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< unsigned > preAssignMask(const ArgTy &Args)
static SDValue getVLOperand(SDValue Op)
static SDValue lowerBUILD_VECTORvXf16(SDValue Op, SelectionDAG &DAG)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, bool EvenElts, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static ArrayRef< MCPhysReg > getFastCCArgGPRs(const RISCVABI::ABI ABI)
static const MCPhysReg ArgVRM8s[]
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static const MCPhysReg ArgVRM4s[]
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static const MCPhysReg ArgFPR16s[]
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static const Intrinsic::ID FixedVssegIntrIds[]
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
const SmallVectorImpl< MachineOperand > & Cond
#define ROTR(x, n)
Definition: SHA256.cpp:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isCommutative(Instruction *I)
#define ROTL(x, b)
Definition: SipHash.cpp:32
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1249
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1241
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:1021
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1498
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1364
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1470
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1179
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:187
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition: APInt.cpp:1614
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1375
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:413
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1489
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition: APInt.cpp:1706
bool isMask(unsigned numBits) const
Definition: APInt.h:466
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:312
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1108
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1367
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:453
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1520
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
an instruction to allocate memory on the stack
Definition: Instructions.h:61
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:495
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:696
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:809
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:708
@ Add
*p = old + v
Definition: Instructions.h:712
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:726
@ Or
*p = old | v
Definition: Instructions.h:720
@ Sub
*p = old - v
Definition: Instructions.h:714
@ And
*p = old & v
Definition: Instructions.h:716
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:748
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:724
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:730
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:728
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:752
@ Nand
*p = ~(old & v)
Definition: Instructions.h:718
bool isFloatingPointOperation() const
Definition: Instructions.h:864
BinOp getOperation() const
Definition: Instructions.h:787
Value * getValOperand()
Definition: Instructions.h:856
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:829
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:218
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:367
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:842
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:314
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:680
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:214
iterator_range< arg_iterator > args()
Definition: Function.h:890
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:769
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:702
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:281
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:357
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:380
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:219
Argument * getArg(unsigned i) const
Definition: Function.h:884
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1896
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2536
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1851
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2053
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:523
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:528
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1766
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1361
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:494
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2514
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1871
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2027
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2432
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:513
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2686
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
Class to represent integer types.
Definition: DerivedTypes.h:40
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:55
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:174
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:259
Value * getPointerOperand()
Definition: Instructions.h:253
bool isSimple() const
Definition: Instructions.h:245
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:209
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:393
Metadata node.
Definition: Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1430
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition: ValueTypes.cpp:293
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:403
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtDOrZdinx() const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
bool hasVInstructionsBF16Minimal() const
unsigned getDLenFactor() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool isRegisterReservedByUser(Register i) const
bool hasVInstructionsF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool isTargetAndroid() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
std::pair< int, bool > getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI, SmallVectorImpl< Instruction * > &DeadInsts) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, LoadInst *LI, SmallVectorImpl< Instruction * > &DeadInsts) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
static RISCVII::VLMUL getLMUL(MVT VT)
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
As per the spec, the rules for passing vector arguments are as follows:
static constexpr unsigned NumArgVRs
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:226
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:736
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:493
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:390
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:746
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:842
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:487
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:876
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:488
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:787
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:690
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:782
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:482
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:813
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:859
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:500
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:753
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:570
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:892
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
An instruction for storing to memory.
Definition: Instructions.h:290
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
std::string lower() const
Definition: StringRef.cpp:111
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
Class to represent struct types.
Definition: DerivedTypes.h:216
bool containsHomogeneousScalableVectorTypes() const
Returns true if this struct contains homogeneous scalable vector types.
Definition: Type.cpp:423
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:341
Type * getTypeAtIndex(const Value *V) const
Given an index value into the type, return the type of the element.
Definition: Type.cpp:600
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:245
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:372
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:343
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:258
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
self_iterator getIterator()
Definition: ilist_node.h:132
#define INT64_MIN
Definition: DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Entry
Definition: COFF.h:826
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition: CallingConv.h:268
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1194
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1190
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:752
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:490
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1407
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1355
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1440
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1337
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:573
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:743
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1223
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1339
@ STRICT_FCEIL
Definition: ISDOpcodes.h:440
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1340
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1099
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:497
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:840
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:557
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1425
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1429
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:716
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1296
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1301
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1439
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:491
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:963
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1335
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:953
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1336
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1480
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:935
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:684
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:464
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:634
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1256
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1422
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:751
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1289
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1426
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1056
@ STRICT_LROUND
Definition: ISDOpcodes.h:445
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:980
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1145
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1338
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1124
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition: ISDOpcodes.h:600
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:660
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:521
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:756
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1441
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:641
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1219
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:444
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1434
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:930
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:673
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:614
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1333
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1279
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:906
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:771
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1397
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1316
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1341
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1028
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1109
@ STRICT_LRINT
Definition: ISDOpcodes.h:447
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:848
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:696
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:605
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:938
@ STRICT_FROUND
Definition: ISDOpcodes.h:442
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:765
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:463
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
Definition: ISDOpcodes.h:1367
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1442
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:441
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:443
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:972
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1331
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:457
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:479
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:456
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1047
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1332
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:886
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1250
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:484
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1276
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:679
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:650
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ STRICT_LLRINT
Definition: ISDOpcodes.h:448
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:626
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1330
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:919
@ STRICT_LLROUND
Definition: ISDOpcodes.h:446
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:437
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:905
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1430
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1214
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1138
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:793
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:507
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ STRICT_FRINT
Definition: ISDOpcodes.h:436
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition: ISDOpcodes.h:594
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition: ISDOpcodes.h:1052
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition: ISDOpcodes.h:831
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:691
@ TRUNCATE_USAT_U
Definition: ISDOpcodes.h:835
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1565
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1565
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1552
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
static const int FIRST_TARGET_STRICTFP_OPCODE
FIRST_TARGET_STRICTFP_OPCODE - Target-specific pre-isel operations which cannot raise FP exceptions s...
Definition: ISDOpcodes.h:1486
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1603
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1583
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1648
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1539
@ Bitcast
Perform the operation on a different, but equivalently sized type.
ABI getTargetABI(StringRef ABIName)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:599
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
@ SW_GUARDED_BRIND
Software guarded BRIND node.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition: DWP.cpp:480
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2431
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1535
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:394
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1935
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:403
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:79
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1928
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1886
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:254
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:329
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Helper struct shared between Function Specialization and SCCP Solver.
Definition: SCCPSolver.h:41
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:381
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:388
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:275
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:291
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:341
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:371
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:416
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:307
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:367
bool isFixedLengthVector() const
Definition: ValueTypes.h:178
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:405
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:314
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:204
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:319
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:327
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:299
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1042
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:62
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:263
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:150
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:161
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:70
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:285
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:300
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:169
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:1002
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:269
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:285
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasDisjoint() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)