LLVM 23.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
39#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/IntrinsicsRISCV.h"
46#include "llvm/Support/Debug.h"
52#include <optional>
53
54using namespace llvm;
55
56#define DEBUG_TYPE "riscv-lower"
57
58STATISTIC(NumTailCalls, "Number of tail calls");
59
61 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
62 cl::desc("Give the maximum size (in number of nodes) of the web of "
63 "instructions that we will consider for VW expansion"),
64 cl::init(18));
65
66static cl::opt<bool>
67 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
68 cl::desc("Allow the formation of VW_W operations (e.g., "
69 "VWADD_W) with splat constants"),
70 cl::init(false));
71
73 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
74 cl::desc("Set the minimum number of repetitions of a divisor to allow "
75 "transformation to multiplications by the reciprocal"),
76 cl::init(2));
77
78static cl::opt<int>
80 cl::desc("Give the maximum number of instructions that we will "
81 "use for creating a floating-point immediate value"),
82 cl::init(3));
83
84static cl::opt<bool>
85 ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden,
86 cl::desc("Swap add and addi in cases where the add may "
87 "be combined with a shift"),
88 cl::init(true));
89
90// TODO: Support more ops
91static const unsigned ZvfbfaVPOps[] = {
92 ISD::VP_FNEG, ISD::VP_FABS, ISD::VP_FCOPYSIGN};
98
100 const RISCVSubtarget &STI)
101 : TargetLowering(TM, STI), Subtarget(STI) {
102
103 RISCVABI::ABI ABI = Subtarget.getTargetABI();
104 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
105
106 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
107 !Subtarget.hasStdExtF()) {
108 errs() << "Hard-float 'f' ABI can't be used for a target that "
109 "doesn't support the F instruction set extension (ignoring "
110 "target-abi)\n";
111 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
112 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
113 !Subtarget.hasStdExtD()) {
114 errs() << "Hard-float 'd' ABI can't be used for a target that "
115 "doesn't support the D instruction set extension (ignoring "
116 "target-abi)\n";
117 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
118 }
119
120 switch (ABI) {
121 default:
122 reportFatalUsageError("Don't know how to lower this ABI");
131 break;
132 }
133
134 MVT XLenVT = Subtarget.getXLenVT();
135
136 // Set up the register classes.
137 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
138
139 if (Subtarget.hasStdExtZfhmin())
140 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
141 if (Subtarget.hasStdExtZfbfmin() || Subtarget.hasVendorXAndesBFHCvt())
142 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
143 if (Subtarget.hasStdExtF())
144 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
145 if (Subtarget.hasStdExtD())
146 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
147 if (Subtarget.hasStdExtZhinxmin())
148 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
149 if (Subtarget.hasStdExtZfinx())
150 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
151 if (Subtarget.hasStdExtZdinx()) {
152 if (Subtarget.is64Bit())
153 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
154 else
155 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
156 }
157
158 static const MVT::SimpleValueType BoolVecVTs[] = {
159 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
160 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
161 static const MVT::SimpleValueType IntVecVTs[] = {
162 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
163 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
164 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
165 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
166 MVT::nxv4i64, MVT::nxv8i64};
167 static const MVT::SimpleValueType F16VecVTs[] = {
168 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
169 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
170 static const MVT::SimpleValueType BF16VecVTs[] = {
171 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
172 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
173 static const MVT::SimpleValueType F32VecVTs[] = {
174 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
175 static const MVT::SimpleValueType F64VecVTs[] = {
176 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
177 static const MVT::SimpleValueType VecTupleVTs[] = {
178 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
179 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
180 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
181 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
182 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
183 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
184 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
185 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
186 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
187 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
188 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
189
190 if (Subtarget.hasVInstructions()) {
191 auto addRegClassForRVV = [this](MVT VT) {
192 // Disable the smallest fractional LMUL types if ELEN is less than
193 // RVVBitsPerBlock.
194 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
195 if (VT.getVectorMinNumElements() < MinElts)
196 return;
197
198 unsigned Size = VT.getSizeInBits().getKnownMinValue();
199 const TargetRegisterClass *RC;
201 RC = &RISCV::VRRegClass;
202 else if (Size == 2 * RISCV::RVVBitsPerBlock)
203 RC = &RISCV::VRM2RegClass;
204 else if (Size == 4 * RISCV::RVVBitsPerBlock)
205 RC = &RISCV::VRM4RegClass;
206 else if (Size == 8 * RISCV::RVVBitsPerBlock)
207 RC = &RISCV::VRM8RegClass;
208 else
209 llvm_unreachable("Unexpected size");
210
211 addRegisterClass(VT, RC);
212 };
213
214 for (MVT VT : BoolVecVTs)
215 addRegClassForRVV(VT);
216 for (MVT VT : IntVecVTs) {
217 if (VT.getVectorElementType() == MVT::i64 &&
218 !Subtarget.hasVInstructionsI64())
219 continue;
220 addRegClassForRVV(VT);
221 }
222
223 if (Subtarget.hasVInstructionsF16Minimal() ||
224 Subtarget.hasVendorXAndesVPackFPH())
225 for (MVT VT : F16VecVTs)
226 addRegClassForRVV(VT);
227
228 if (Subtarget.hasVInstructionsBF16Minimal() ||
229 Subtarget.hasVendorXAndesVBFHCvt())
230 for (MVT VT : BF16VecVTs)
231 addRegClassForRVV(VT);
232
233 if (Subtarget.hasVInstructionsF32())
234 for (MVT VT : F32VecVTs)
235 addRegClassForRVV(VT);
236
237 if (Subtarget.hasVInstructionsF64())
238 for (MVT VT : F64VecVTs)
239 addRegClassForRVV(VT);
240
241 if (Subtarget.useRVVForFixedLengthVectors()) {
242 auto addRegClassForFixedVectors = [this](MVT VT) {
243 MVT ContainerVT = getContainerForFixedLengthVector(VT);
244 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
245 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
246 addRegisterClass(VT, TRI.getRegClass(RCID));
247 };
249 if (useRVVForFixedLengthVectorVT(VT))
250 addRegClassForFixedVectors(VT);
251
253 if (useRVVForFixedLengthVectorVT(VT))
254 addRegClassForFixedVectors(VT);
255 }
256
257 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
258 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
259 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
260 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
261 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
262 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
263 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
264 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
265 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
266 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
267 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
268 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
269 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
270 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
271 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
272 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
273 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
274 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
275 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
276 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
277 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
278 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
279 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
280 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
281 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
282 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
283 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
284 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
285 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
286 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
287 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
288 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
289 }
290
291 // fixed vector is stored in GPRs for P extension packed operations
292 if (Subtarget.enablePExtSIMDCodeGen()) {
293 if (Subtarget.is64Bit()) {
294 addRegisterClass(MVT::v2i32, &RISCV::GPRRegClass);
295 addRegisterClass(MVT::v4i16, &RISCV::GPRRegClass);
296 addRegisterClass(MVT::v8i8, &RISCV::GPRRegClass);
297 } else {
298 addRegisterClass(MVT::v2i16, &RISCV::GPRRegClass);
299 addRegisterClass(MVT::v4i8, &RISCV::GPRRegClass);
300 }
301 }
302
303 // Compute derived properties from the register classes.
305
307
309 MVT::i1, Promote);
310 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
312 MVT::i1, Promote);
313
314 // TODO: add all necessary setOperationAction calls.
316
321
326 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
329 }
330
332
335
336 if (!Subtarget.hasVendorXTHeadBb() && !Subtarget.hasVendorXqcibm() &&
337 !Subtarget.hasVendorXAndesPerf())
339
341
342 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
343 !Subtarget.hasVendorXqcibm() && !Subtarget.hasVendorXAndesPerf() &&
344 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
345 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
346
347 if (Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit()) {
350 }
351
352 if (Subtarget.is64Bit()) {
354
357 MVT::i32, Custom);
360 }
361 if (!Subtarget.hasStdExtZmmul()) {
363 } else if (Subtarget.is64Bit()) {
366 } else {
368 }
369
370 if (!Subtarget.hasStdExtM()) {
372 Expand);
373 } else if (Subtarget.is64Bit()) {
375 {MVT::i8, MVT::i16, MVT::i32}, Custom);
376 }
377
380 Expand);
381
383 Custom);
384
385 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
386 if (Subtarget.is64Bit())
388 } else if (Subtarget.hasVendorXTHeadBb()) {
389 if (Subtarget.is64Bit())
392 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
394 } else {
396 }
397
398 if (Subtarget.hasStdExtP())
400
402 Subtarget.hasREV8Like() ? Legal : Expand);
403
404 if ((Subtarget.hasVendorXCVbitmanip() || Subtarget.hasVendorXqcibm()) &&
405 !Subtarget.is64Bit()) {
407 } else {
408 // Zbkb can use rev8+brev8 to implement bitreverse.
410 Subtarget.hasStdExtZbkb() ? Custom : Expand);
411 if (Subtarget.hasStdExtZbkb())
413 }
414
415 if (Subtarget.hasStdExtZbb() ||
416 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
418 Legal);
419 }
420
421 if (Subtarget.hasCTZLike()) {
422 if (Subtarget.is64Bit())
424 } else {
426 }
427
428 if (!Subtarget.hasCPOPLike()) {
429 // TODO: These should be set to LibCall, but this currently breaks
430 // the Linux kernel build. See #101786. Lacks i128 tests, too.
431 if (Subtarget.is64Bit())
433 else
436 }
437
438 if (Subtarget.hasCLZLike()) {
439 // We need the custom lowering to make sure that the resulting sequence
440 // for the 32bit case is efficient on 64bit targets.
441 // Use default promotion for i32 without Zbb.
442 if (Subtarget.is64Bit() &&
443 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtP()))
445 } else {
447 }
448
449 if (Subtarget.hasStdExtP()) {
451 if (Subtarget.is64Bit())
453 }
454
455 if (Subtarget.hasStdExtP() ||
456 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
458 if (Subtarget.is64Bit())
460 } else if (Subtarget.hasShortForwardBranchIALU()) {
461 // We can use PseudoCCSUB to implement ABS.
463 } else if (Subtarget.is64Bit()) {
465 }
466
467 if (!Subtarget.useMIPSCCMovInsn() && !Subtarget.hasVendorXTHeadCondMov())
469
470 if ((Subtarget.hasStdExtP() || Subtarget.hasVendorXqcia()) &&
471 !Subtarget.is64Bit()) {
472 // FIXME: Support i32 on RV64+P by inserting into a v2i32 vector, doing
473 // the vector operation and extracting.
475 MVT::i32, Legal);
476 } else if (!Subtarget.hasStdExtZbb() && Subtarget.is64Bit()) {
478 MVT::i32, Custom);
479 }
480
481 if (Subtarget.hasVendorXqcia() && !Subtarget.is64Bit()) {
483 }
484
485 if ((Subtarget.hasStdExtP() || Subtarget.hasVendorXqcia()) &&
486 !Subtarget.is64Bit()) {
487 // FIXME: Support i32 on RV64+P by inserting into a v2i32 vector, doing
488 // pssha.w and extracting.
490 }
491
492 static const unsigned FPLegalNodeTypes[] = {
500
501 static const ISD::CondCode FPCCToExpand[] = {
505
506 static const unsigned FPOpToExpand[] = {ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
507 ISD::FPOW};
508 static const unsigned FPOpToLibCall[] = {ISD::FREM};
509
510 static const unsigned FPRndMode[] = {
513
514 static const unsigned ZfhminZfbfminPromoteOps[] = {
524
525 if (Subtarget.enablePExtSIMDCodeGen()) {
527 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
528 setTruncStoreAction(MVT::v4i16, MVT::v4i8, Expand);
529 static const MVT RV32VTs[] = {MVT::v2i16, MVT::v4i8};
530 static const MVT RV64VTs[] = {MVT::v2i32, MVT::v4i16, MVT::v8i8};
531 ArrayRef<MVT> VTs;
532 if (Subtarget.is64Bit()) {
533 VTs = RV64VTs;
534 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Expand);
535 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
536 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand);
537 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
538 setTruncStoreAction(MVT::v4i16, MVT::v4i8, Expand);
539 } else {
540 VTs = RV32VTs;
542 }
556 VTs, Expand);
558 Legal);
564 VTs, Expand);
565 // P extension vector comparisons produce all 1s for true, all 0s for false
567 }
568
569 if (Subtarget.hasStdExtZfbfmin()) {
575 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
582 }
583
584 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
585 if (Subtarget.hasStdExtZfhOrZhinx()) {
586 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
587 setOperationAction(FPRndMode, MVT::f16,
588 Subtarget.hasStdExtZfa() ? Legal : Custom);
591 Subtarget.hasStdExtZfa() ? Legal : Custom);
592 if (Subtarget.hasStdExtZfa())
594 } else {
595 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
600 setOperationAction(Op, MVT::f16, Custom);
606 }
607
608 if (!Subtarget.hasStdExtD()) {
609 // FIXME: handle f16 fma when f64 is not legal. Using an f32 fma
610 // instruction runs into double rounding issues, so this is wrong.
611 // Normally we'd use an f64 fma, but without the D extension the f64 type
612 // is not legal. This should probably be a libcall.
613 AddPromotedToType(ISD::FMA, MVT::f16, MVT::f32);
614 AddPromotedToType(ISD::STRICT_FMA, MVT::f16, MVT::f32);
615 }
616
618
621 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
625
627 ISD::FNEARBYINT, MVT::f16,
628 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
633 MVT::f16, Promote);
634
635 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
636 // complete support for all operations in LegalizeDAG.
641 MVT::f16, Promote);
642
643 // We need to custom promote this.
644 if (Subtarget.is64Bit())
646 }
647
648 if (Subtarget.hasStdExtFOrZfinx()) {
649 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
650 setOperationAction(FPRndMode, MVT::f32,
651 Subtarget.hasStdExtZfa() ? Legal : Custom);
652 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
656 setOperationAction(FPOpToExpand, MVT::f32, Expand);
657 setOperationAction(FPOpToLibCall, MVT::f32, LibCall);
658 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
659 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
660 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
661 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
665 Subtarget.isSoftFPABI() ? LibCall : Custom);
670
671 if (Subtarget.hasStdExtZfa()) {
675 } else {
677 }
678 }
679
680 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
682
683 if (Subtarget.hasStdExtDOrZdinx()) {
684 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
685
686 if (!Subtarget.is64Bit())
688
689 if (Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
690 !Subtarget.is64Bit()) {
693 }
694
695 if (Subtarget.hasStdExtZfa()) {
697 setOperationAction(FPRndMode, MVT::f64, Legal);
700 } else {
701 if (Subtarget.is64Bit())
702 setOperationAction(FPRndMode, MVT::f64, Custom);
703
705 }
706
709 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
713 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
714 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
715 setOperationAction(FPOpToExpand, MVT::f64, Expand);
716 setOperationAction(FPOpToLibCall, MVT::f64, LibCall);
717 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
718 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
719 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
720 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
724 Subtarget.isSoftFPABI() ? LibCall : Custom);
729 }
730
731 if (Subtarget.is64Bit()) {
734 MVT::i32, Custom);
736 }
737
738 if (Subtarget.hasStdExtFOrZfinx()) {
740 Custom);
741
742 // f16/bf16 require custom handling.
744 Custom);
746 Custom);
747
756 }
757
760 XLenVT, Custom);
761
763
764 if (Subtarget.is64Bit())
766
767 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
768 // Unfortunately this can't be determined just from the ISA naming string.
770 Subtarget.is64Bit() ? Legal : Custom);
772 Subtarget.is64Bit() ? Legal : Custom);
773
774 if (Subtarget.is64Bit()) {
777 }
778
781 if (Subtarget.is64Bit())
783
784 if (Subtarget.hasVendorXMIPSCBOP())
786 else if (Subtarget.hasStdExtZicbop())
788
789 if (Subtarget.hasStdExtZalrsc()) {
790 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
791 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
793 else
795 } else if (Subtarget.hasForcedAtomics()) {
796 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
797 } else {
799 }
800
802
804
805 if (getTargetMachine().getTargetTriple().isOSLinux()) {
806 // Custom lowering of llvm.clear_cache.
808 }
809
810 if (Subtarget.hasVInstructions()) {
812
814
815 // RVV intrinsics may have illegal operands.
816 // We also need to custom legalize vmv.x.s.
819 {MVT::i8, MVT::i16}, Custom);
820 if (Subtarget.is64Bit())
822 MVT::i32, Custom);
823 else
825 MVT::i64, Custom);
826
828 MVT::Other, Custom);
829
830 static const unsigned IntegerVPOps[] = {
831 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
832 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
833 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
834 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
835 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
836 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
837 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
838 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
839 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
840 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
841 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
842 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
843 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
844 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF};
845
846 static const unsigned FloatingPointVPOps[] = {
847 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
848 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
849 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
850 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
851 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
852 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
853 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
854 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
855 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
856 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
857 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
858 ISD::VP_LLRINT, ISD::VP_REDUCE_FMINIMUM,
859 ISD::VP_REDUCE_FMAXIMUM};
860
861 static const unsigned IntegerVecReduceOps[] = {
865
866 static const unsigned FloatingPointVecReduceOps[] = {
869
870 static const unsigned FloatingPointLibCallOps[] = {
873
874 if (!Subtarget.is64Bit()) {
875 // We must custom-lower certain vXi64 operations on RV32 due to the vector
876 // element type being illegal.
878 MVT::i64, Custom);
879
880 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
881
882 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
883 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
884 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
885 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
886 MVT::i64, Custom);
887 }
888
889 for (MVT VT : BoolVecVTs) {
890 if (!isTypeLegal(VT))
891 continue;
892
894
895 // Mask VTs are custom-expanded into a series of standard nodes
899 VT, Custom);
900
902 Custom);
903
905 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
906 Expand);
907 setOperationAction(ISD::VP_MERGE, VT, Custom);
908
909 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
910 Custom);
911
912 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
913
916 Custom);
917
919 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
920 Custom);
921
922 // RVV has native int->float & float->int conversions where the
923 // element type sizes are within one power-of-two of each other. Any
924 // wider distances between type sizes have to be lowered as sequences
925 // which progressively narrow the gap in stages.
930 VT, Custom);
932 Custom);
933
934 // Expand all extending loads to types larger than this, and truncating
935 // stores from types larger than this.
937 setTruncStoreAction(VT, OtherVT, Expand);
939 OtherVT, Expand);
940 }
941
942 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
943 ISD::VP_TRUNCATE, ISD::VP_SETCC},
944 VT, Custom);
945
948
950
951 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
952 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
953
956 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
957 }
958
959 for (MVT VT : IntVecVTs) {
960 if (!isTypeLegal(VT))
961 continue;
962
965
966 // Vectors implement MULHS/MULHU.
968
969 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
970 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
972
974 Legal);
975
977
978 // Custom-lower extensions and truncations from/to mask types.
980 VT, Custom);
981
982 // RVV has native int->float & float->int conversions where the
983 // element type sizes are within one power-of-two of each other. Any
984 // wider distances between type sizes have to be lowered as sequences
985 // which progressively narrow the gap in stages.
990 VT, Custom);
992 Custom);
996 VT, Legal);
997
998 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
999 // nodes which truncate by one power of two at a time.
1002 Custom);
1003
1004 // Custom-lower insert/extract operations to simplify patterns.
1006 Custom);
1007
1008 // Custom-lower reduction operations to set up the corresponding custom
1009 // nodes' operands.
1010 setOperationAction(IntegerVecReduceOps, VT, Custom);
1011
1012 setOperationAction(IntegerVPOps, VT, Custom);
1013
1015
1017 VT, Custom);
1018
1020 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1021 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1022 VT, Custom);
1023 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1024
1027 VT, Custom);
1028
1031
1033
1035 setTruncStoreAction(VT, OtherVT, Expand);
1037 OtherVT, Expand);
1038 }
1039
1042
1044 VT, Custom);
1045
1046 if (Subtarget.hasStdExtZvkb()) {
1048 setOperationAction(ISD::VP_BSWAP, VT, Custom);
1049 } else {
1050 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
1052 }
1053
1054 if (Subtarget.hasStdExtZvbb()) {
1056 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
1057 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
1058 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
1059 VT, Custom);
1060 } else {
1061 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
1063 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
1064 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
1065 VT, Expand);
1066
1067 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1068 // range of f32.
1069 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1070 if (isTypeLegal(FloatVT)) {
1072 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
1073 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
1074 VT, Custom);
1075 }
1076 }
1077
1079 }
1080
1081 for (MVT VT : VecTupleVTs) {
1082 if (!isTypeLegal(VT))
1083 continue;
1084
1086 }
1087
1088 // Expand various CCs to best match the RVV ISA, which natively supports UNE
1089 // but no other unordered comparisons, and supports all ordered comparisons
1090 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
1091 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
1092 // and we pattern-match those back to the "original", swapping operands once
1093 // more. This way we catch both operations and both "vf" and "fv" forms with
1094 // fewer patterns.
1095 static const ISD::CondCode VFPCCToExpand[] = {
1099 };
1100
1101 // TODO: support more ops.
1102 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
1107 ISD::FADD,
1108 ISD::FSUB,
1109 ISD::FMUL,
1110 ISD::FMA,
1111 ISD::FDIV,
1112 ISD::FSQRT,
1113 ISD::FCEIL,
1118 ISD::FRINT,
1121 ISD::SETCC,
1134
1135 // TODO: Make more of these ops legal.
1136 static const unsigned ZvfbfaPromoteOps[] = {ISD::FDIV,
1137 ISD::FSQRT,
1138 ISD::FCEIL,
1143 ISD::FRINT,
1145 ISD::SETCC,
1156
1157 // TODO: support more vp ops.
1158 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
1159 ISD::VP_FADD,
1160 ISD::VP_FSUB,
1161 ISD::VP_FMUL,
1162 ISD::VP_FDIV,
1163 ISD::VP_FMA,
1164 ISD::VP_REDUCE_FMIN,
1165 ISD::VP_REDUCE_FMAX,
1166 ISD::VP_SQRT,
1167 ISD::VP_FMINNUM,
1168 ISD::VP_FMAXNUM,
1169 ISD::VP_FCEIL,
1170 ISD::VP_FFLOOR,
1171 ISD::VP_FROUND,
1172 ISD::VP_FROUNDEVEN,
1173 ISD::VP_FROUNDTOZERO,
1174 ISD::VP_FRINT,
1175 ISD::VP_FNEARBYINT,
1176 ISD::VP_SETCC,
1177 ISD::VP_FMINIMUM,
1178 ISD::VP_FMAXIMUM,
1179 ISD::VP_REDUCE_FMINIMUM,
1180 ISD::VP_REDUCE_FMAXIMUM};
1181
1182 // Sets common operation actions on RVV floating-point vector types.
1183 const auto SetCommonVFPActions = [&](MVT VT) {
1185 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
1186 // sizes are within one power-of-two of each other. Therefore conversions
1187 // between vXf16 and vXf64 must be lowered as sequences which convert via
1188 // vXf32.
1192 // Custom-lower insert/extract operations to simplify patterns.
1194 Custom);
1195 // Expand various condition codes (explained above).
1196 setCondCodeAction(VFPCCToExpand, VT, Expand);
1197
1200 Legal);
1202
1206 VT, Custom);
1207
1208 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1209
1210 // Expand FP operations that need libcalls.
1211 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1212
1214
1216
1218 VT, Custom);
1219
1221 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1222 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1223 VT, Custom);
1224 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1225
1228
1231 VT, Custom);
1232
1235
1238 VT, Custom);
1239 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1240 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1241
1242 setOperationAction(FloatingPointVPOps, VT, Custom);
1243
1245 Custom);
1248 VT, Legal);
1253 VT, Custom);
1254
1256 };
1257
1258 // Sets common extload/truncstore actions on RVV floating-point vector
1259 // types.
1260 const auto SetCommonVFPExtLoadTruncStoreActions =
1261 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1262 for (auto SmallVT : SmallerVTs) {
1263 setTruncStoreAction(VT, SmallVT, Expand);
1264 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1265 }
1266 };
1267
1268 // Sets common actions for f16 and bf16 for when there's only
1269 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1270 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1273 Custom);
1274 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1277 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1278 Custom);
1280 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1286 VT, Custom);
1287 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1288 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1289 MVT EltVT = VT.getVectorElementType();
1290 if (isTypeLegal(EltVT))
1292 VT, Custom);
1293 else
1296 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1297 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1298 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1299 ISD::VP_SCATTER},
1300 VT, Custom);
1301 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1302
1306
1307 // Expand FP operations that need libcalls.
1308 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1309
1310 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1311 if (getLMUL(VT) == RISCVVType::LMUL_8) {
1312 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1313 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1314 } else {
1315 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1316 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1317 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1318 }
1319 };
1320
1321 // Sets common actions for zvfbfa, some of instructions are supported
1322 // natively so that we don't need to promote them.
1323 const auto SetZvfbfaActions = [&](MVT VT) {
1326 Custom);
1327 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1330 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1331 Custom);
1333 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1340 VT, Custom);
1343 Legal);
1346 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1347 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1348
1352
1354 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1355 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1356 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1357 ISD::VP_SCATTER},
1358 VT, Custom);
1359 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1360
1361 // Expand FP operations that need libcalls.
1362 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1363
1364 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1365 if (getLMUL(VT) == RISCVVType::LMUL_8) {
1366 setOperationAction(ZvfbfaPromoteOps, VT, Custom);
1367 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1368 } else {
1369 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1370 setOperationPromotedToType(ZvfbfaPromoteOps, VT, F32VecVT);
1371 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1372 }
1373 };
1374
1375 if (Subtarget.hasVInstructionsF16()) {
1376 for (MVT VT : F16VecVTs) {
1377 if (!isTypeLegal(VT))
1378 continue;
1379 SetCommonVFPActions(VT);
1380 }
1381 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1382 for (MVT VT : F16VecVTs) {
1383 if (!isTypeLegal(VT))
1384 continue;
1385 SetCommonPromoteToF32Actions(VT);
1386 }
1387 }
1388
1389 if (Subtarget.hasVInstructionsBF16()) {
1390 for (MVT VT : BF16VecVTs) {
1391 if (!isTypeLegal(VT))
1392 continue;
1393 SetZvfbfaActions(VT);
1394 }
1395 } else if (Subtarget.hasVInstructionsBF16Minimal()) {
1396 for (MVT VT : BF16VecVTs) {
1397 if (!isTypeLegal(VT))
1398 continue;
1399 SetCommonPromoteToF32Actions(VT);
1400 }
1401 }
1402
1403 if (Subtarget.hasVInstructionsF32()) {
1404 for (MVT VT : F32VecVTs) {
1405 if (!isTypeLegal(VT))
1406 continue;
1407 SetCommonVFPActions(VT);
1408 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1409 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1410 }
1411 }
1412
1413 if (Subtarget.hasVInstructionsF64()) {
1414 for (MVT VT : F64VecVTs) {
1415 if (!isTypeLegal(VT))
1416 continue;
1417 SetCommonVFPActions(VT);
1418 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1419 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1420 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1421 }
1422 }
1423
1424 if (Subtarget.useRVVForFixedLengthVectors()) {
1426 if (!useRVVForFixedLengthVectorVT(VT))
1427 continue;
1428
1429 // By default everything must be expanded.
1430 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1433 setTruncStoreAction(VT, OtherVT, Expand);
1435 OtherVT, Expand);
1436 }
1437
1438 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1439 // expansion to a build_vector of 0s.
1441
1442 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1444 Custom);
1445
1448 Custom);
1449
1451 VT, Custom);
1452
1454 VT, Custom);
1455
1457
1459
1461
1463
1466 Custom);
1467
1469
1472 Custom);
1473
1475 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1476 Custom);
1477
1479 {
1488 },
1489 VT, Custom);
1491 Custom);
1492
1494
1495 // Operations below are different for between masks and other vectors.
1496 if (VT.getVectorElementType() == MVT::i1) {
1497 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1498 ISD::OR, ISD::XOR},
1499 VT, Custom);
1500
1501 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1502 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1503 VT, Custom);
1504
1505 setOperationAction(ISD::VP_MERGE, VT, Custom);
1506
1507 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1508 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1509 continue;
1510 }
1511
1512 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1513 // it before type legalization for i64 vectors on RV32. It will then be
1514 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1515 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1516 // improvements first.
1517 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1520
1521 // Lower BUILD_VECTOR with i64 type to VID on RV32 if possible.
1523 }
1524
1527
1528 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1529 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1530 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1531 ISD::VP_SCATTER},
1532 VT, Custom);
1533 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1534
1538 VT, Custom);
1539
1542
1544
1545 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1546 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1548
1552 VT, Custom);
1553
1555
1558
1559 // Custom-lower reduction operations to set up the corresponding custom
1560 // nodes' operands.
1564 VT, Custom);
1565
1566 setOperationAction(IntegerVPOps, VT, Custom);
1567
1568 if (Subtarget.hasStdExtZvkb())
1570
1571 if (Subtarget.hasStdExtZvbb()) {
1574 VT, Custom);
1575 } else {
1576 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1577 // range of f32.
1578 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1579 if (isTypeLegal(FloatVT))
1582 Custom);
1583 }
1584
1586 }
1587
1589 // There are no extending loads or truncating stores.
1590 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1591 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1592 setTruncStoreAction(VT, InnerVT, Expand);
1593 }
1594
1595 if (!useRVVForFixedLengthVectorVT(VT))
1596 continue;
1597
1598 // By default everything must be expanded.
1599 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1601
1602 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1603 // expansion to a build_vector of 0s.
1605
1610 VT, Custom);
1611 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1612 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1613
1615 VT, Custom);
1616
1619 VT, Custom);
1620 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1621 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1622 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1623 VT, Custom);
1624 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1625
1628 Custom);
1629
1630 if (VT.getVectorElementType() == MVT::f16 &&
1631 !Subtarget.hasVInstructionsF16()) {
1633 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1635 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1636 Custom);
1637 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1638 Custom);
1641 if (Subtarget.hasStdExtZfhmin()) {
1643 } else {
1644 // We need to custom legalize f16 build vectors if Zfhmin isn't
1645 // available.
1647 }
1651 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1652 // Don't promote f16 vector operations to f32 if f32 vector type is
1653 // not legal.
1654 // TODO: could split the f16 vector into two vectors and do promotion.
1655 if (!isTypeLegal(F32VecVT))
1656 continue;
1657 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1658 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1659 continue;
1660 }
1661
1662 if (VT.getVectorElementType() == MVT::bf16) {
1664 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1667 if (Subtarget.hasStdExtZfbfmin()) {
1669 } else {
1670 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1671 // available.
1673 }
1674 if (Subtarget.hasStdExtZvfbfa()) {
1677 }
1679 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1680 Custom);
1681 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1682 // Don't promote f16 vector operations to f32 if f32 vector type is
1683 // not legal.
1684 // TODO: could split the f16 vector into two vectors and do promotion.
1685 if (!isTypeLegal(F32VecVT))
1686 continue;
1687
1688 if (Subtarget.hasStdExtZvfbfa())
1689 setOperationPromotedToType(ZvfbfaPromoteOps, VT, F32VecVT);
1690 else
1691 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1692 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1693 continue;
1694 }
1695
1697 Custom);
1698
1704 VT, Custom);
1705
1710 VT, Custom);
1711
1712 setCondCodeAction(VFPCCToExpand, VT, Expand);
1713
1716
1718
1719 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1720
1721 setOperationAction(FloatingPointVPOps, VT, Custom);
1722
1729 VT, Custom);
1730 }
1731
1732 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1733 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1734 if (Subtarget.is64Bit())
1736 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1738 if (Subtarget.hasStdExtZfbfmin())
1740 if (Subtarget.hasStdExtFOrZfinx())
1742 if (Subtarget.hasStdExtDOrZdinx())
1744 }
1745 }
1746
1747 if (Subtarget.hasStdExtZaamo())
1749
1750 if (Subtarget.hasForcedAtomics()) {
1751 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1757 XLenVT, LibCall);
1758 }
1759
1760 if (Subtarget.hasVendorXTHeadMemIdx()) {
1761 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1762 setIndexedLoadAction(im, MVT::i8, Legal);
1763 setIndexedStoreAction(im, MVT::i8, Legal);
1764 setIndexedLoadAction(im, MVT::i16, Legal);
1765 setIndexedStoreAction(im, MVT::i16, Legal);
1766 setIndexedLoadAction(im, MVT::i32, Legal);
1767 setIndexedStoreAction(im, MVT::i32, Legal);
1768
1769 if (Subtarget.is64Bit()) {
1770 setIndexedLoadAction(im, MVT::i64, Legal);
1771 setIndexedStoreAction(im, MVT::i64, Legal);
1772 }
1773 }
1774 }
1775
1776 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1780
1784 }
1785
1786 // zve32x is broken for partial_reduce_umla, but let's not make it worse.
1787 if (Subtarget.hasStdExtZvqdotq() && Subtarget.getELen() >= 64) {
1788 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
1791 setPartialReduceMLAAction(MLAOps, MVT::nxv1i32, MVT::nxv4i8, Custom);
1792 setPartialReduceMLAAction(MLAOps, MVT::nxv2i32, MVT::nxv8i8, Custom);
1793 setPartialReduceMLAAction(MLAOps, MVT::nxv4i32, MVT::nxv16i8, Custom);
1794 setPartialReduceMLAAction(MLAOps, MVT::nxv8i32, MVT::nxv32i8, Custom);
1795 setPartialReduceMLAAction(MLAOps, MVT::nxv16i32, MVT::nxv64i8, Custom);
1796
1797 if (Subtarget.useRVVForFixedLengthVectors()) {
1799 if (VT.getVectorElementType() != MVT::i32 ||
1800 !useRVVForFixedLengthVectorVT(VT))
1801 continue;
1802 ElementCount EC = VT.getVectorElementCount();
1803 MVT ArgVT = MVT::getVectorVT(MVT::i8, EC.multiplyCoefficientBy(4));
1804 setPartialReduceMLAAction(MLAOps, VT, ArgVT, Custom);
1805 }
1806 }
1807 }
1808
1809 // Customize load and store operation for bf16 if zfh isn't enabled.
1810 if (Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh()) {
1811 setOperationAction(ISD::LOAD, MVT::bf16, Custom);
1813 }
1814
1815 // Function alignments.
1816 const Align FunctionAlignment(Subtarget.hasStdExtZca() ? 2 : 4);
1817 setMinFunctionAlignment(FunctionAlignment);
1818 // Set preferred alignments.
1819 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
1820 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
1821
1827
1828 if (Subtarget.hasStdExtFOrZfinx())
1830
1831 if (Subtarget.hasStdExtZbb())
1833
1834 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1835 Subtarget.hasVInstructions())
1837
1838 if (Subtarget.hasStdExtZbkb())
1840
1841 if (Subtarget.hasStdExtFOrZfinx())
1844 if (Subtarget.hasVInstructions())
1847 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA,
1850 ISD::VP_STORE, ISD::VP_TRUNCATE, ISD::EXPERIMENTAL_VP_REVERSE,
1855
1856 if (Subtarget.hasVendorXTHeadMemPair())
1858 if (Subtarget.useRVVForFixedLengthVectors())
1860
1861 setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64);
1862
1863 setMaxLargeFPConvertBitWidthSupported(Subtarget.is64Bit() ? 128 : 64);
1864
1865 // Disable strict node mutation.
1866 IsStrictFPEnabled = true;
1867 EnableExtLdPromotion = true;
1868
1869 // Let the subtarget decide if a predictable select is more expensive than the
1870 // corresponding branch. This information is used in CGP/SelectOpt to decide
1871 // when to convert selects into branches.
1872 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1873
1874 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1875 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1876
1877 MaxGluedStoresPerMemcpy = Subtarget.getMaxGluedStoresPerMemcpy();
1878 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1879 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1880
1882 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1883 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1884
1885 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1886 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1887}
1888
1891 if (Subtarget.is64Bit() && Subtarget.enablePExtSIMDCodeGen())
1892 if (VT == MVT::v2i16 || VT == MVT::v4i8)
1893 return TypeWidenVector;
1894
1896}
1897
1899 LLVMContext &Context,
1900 EVT VT) const {
1901 if (!VT.isVector())
1902 return getPointerTy(DL);
1903 if (Subtarget.hasVInstructions() &&
1904 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1905 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1907}
1908
1910 return Subtarget.getXLenVT();
1911}
1912
1913// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1914bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1915 unsigned VF,
1916 bool IsScalable) const {
1917 if (!Subtarget.hasVInstructions())
1918 return true;
1919
1920 if (!IsScalable)
1921 return true;
1922
1923 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1924 return true;
1925
1926 // Don't allow VF=1 if those types are't legal.
1927 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1928 return true;
1929
1930 // VLEN=32 support is incomplete.
1931 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1932 return true;
1933
1934 // The maximum VF is for the smallest element width with LMUL=8.
1935 // VF must be a power of 2.
1936 unsigned MaxVF = RISCV::RVVBytesPerBlock * 8;
1937 return VF > MaxVF || !isPowerOf2_32(VF);
1938}
1939
1941 return !Subtarget.hasVInstructions() ||
1942 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1943}
1944
1946 const CallBase &I,
1947 MachineFunction &MF,
1948 unsigned Intrinsic) const {
1949 auto &DL = I.getDataLayout();
1950
1951 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1952 bool IsUnitStrided, bool UsePtrVal = false) {
1953 Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
1954 // We can't use ptrVal if the intrinsic can access memory before the
1955 // pointer. This means we can't use it for strided or indexed intrinsics.
1956 if (UsePtrVal)
1957 Info.ptrVal = I.getArgOperand(PtrOp);
1958 else
1959 Info.fallbackAddressSpace =
1960 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1961 Type *MemTy;
1962 if (IsStore) {
1963 // Store value is the first operand.
1964 MemTy = I.getArgOperand(0)->getType();
1965 } else {
1966 // Use return type. If it's segment load, return type is a struct.
1967 MemTy = I.getType();
1968 if (MemTy->isStructTy())
1969 MemTy = MemTy->getStructElementType(0);
1970 }
1971 if (!IsUnitStrided)
1972 MemTy = MemTy->getScalarType();
1973
1974 Info.memVT = getValueType(DL, MemTy);
1975 if (MemTy->isTargetExtTy()) {
1976 // RISC-V vector tuple type's alignment type should be its element type.
1977 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1978 MemTy = Type::getIntNTy(
1979 MemTy->getContext(),
1980 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1981 ->getZExtValue());
1982 Info.align = DL.getABITypeAlign(MemTy);
1983 } else {
1984 Info.align = Align(DL.getTypeStoreSize(MemTy->getScalarType()));
1985 }
1986 Info.size = MemoryLocation::UnknownSize;
1987 Info.flags |=
1989 return true;
1990 };
1991
1992 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1994
1996 switch (Intrinsic) {
1997 default:
1998 return false;
1999 case Intrinsic::riscv_masked_atomicrmw_xchg:
2000 case Intrinsic::riscv_masked_atomicrmw_add:
2001 case Intrinsic::riscv_masked_atomicrmw_sub:
2002 case Intrinsic::riscv_masked_atomicrmw_nand:
2003 case Intrinsic::riscv_masked_atomicrmw_max:
2004 case Intrinsic::riscv_masked_atomicrmw_min:
2005 case Intrinsic::riscv_masked_atomicrmw_umax:
2006 case Intrinsic::riscv_masked_atomicrmw_umin:
2007 case Intrinsic::riscv_masked_cmpxchg:
2008 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
2009 // narrow atomic operation. These will be expanded to an LR/SC loop that
2010 // reads/writes to/from an aligned 4 byte location. And, or, shift, etc.
2011 // will be used to modify the appropriate part of the 4 byte data and
2012 // preserve the rest.
2013 Info.opc = ISD::INTRINSIC_W_CHAIN;
2014 Info.memVT = MVT::i32;
2015 Info.ptrVal = I.getArgOperand(0);
2016 Info.offset = 0;
2017 Info.align = Align(4);
2020 return true;
2021 case Intrinsic::riscv_seg2_load_mask:
2022 case Intrinsic::riscv_seg3_load_mask:
2023 case Intrinsic::riscv_seg4_load_mask:
2024 case Intrinsic::riscv_seg5_load_mask:
2025 case Intrinsic::riscv_seg6_load_mask:
2026 case Intrinsic::riscv_seg7_load_mask:
2027 case Intrinsic::riscv_seg8_load_mask:
2028 case Intrinsic::riscv_sseg2_load_mask:
2029 case Intrinsic::riscv_sseg3_load_mask:
2030 case Intrinsic::riscv_sseg4_load_mask:
2031 case Intrinsic::riscv_sseg5_load_mask:
2032 case Intrinsic::riscv_sseg6_load_mask:
2033 case Intrinsic::riscv_sseg7_load_mask:
2034 case Intrinsic::riscv_sseg8_load_mask:
2035 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
2036 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
2037 case Intrinsic::riscv_seg2_store_mask:
2038 case Intrinsic::riscv_seg3_store_mask:
2039 case Intrinsic::riscv_seg4_store_mask:
2040 case Intrinsic::riscv_seg5_store_mask:
2041 case Intrinsic::riscv_seg6_store_mask:
2042 case Intrinsic::riscv_seg7_store_mask:
2043 case Intrinsic::riscv_seg8_store_mask:
2044 // Operands are (vec, ..., vec, ptr, mask, vl)
2045 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
2046 /*IsStore*/ true,
2047 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
2048 case Intrinsic::riscv_sseg2_store_mask:
2049 case Intrinsic::riscv_sseg3_store_mask:
2050 case Intrinsic::riscv_sseg4_store_mask:
2051 case Intrinsic::riscv_sseg5_store_mask:
2052 case Intrinsic::riscv_sseg6_store_mask:
2053 case Intrinsic::riscv_sseg7_store_mask:
2054 case Intrinsic::riscv_sseg8_store_mask:
2055 // Operands are (vec, ..., vec, ptr, offset, mask, vl)
2056 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2057 /*IsStore*/ true,
2058 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
2059 case Intrinsic::riscv_vlm:
2060 return SetRVVLoadStoreInfo(/*PtrOp*/ 0,
2061 /*IsStore*/ false,
2062 /*IsUnitStrided*/ true,
2063 /*UsePtrVal*/ true);
2064 case Intrinsic::riscv_vle:
2065 case Intrinsic::riscv_vle_mask:
2066 case Intrinsic::riscv_vleff:
2067 case Intrinsic::riscv_vleff_mask:
2068 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
2069 /*IsStore*/ false,
2070 /*IsUnitStrided*/ true,
2071 /*UsePtrVal*/ true);
2072 case Intrinsic::riscv_vsm:
2073 case Intrinsic::riscv_vse:
2074 case Intrinsic::riscv_vse_mask:
2075 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
2076 /*IsStore*/ true,
2077 /*IsUnitStrided*/ true,
2078 /*UsePtrVal*/ true);
2079 case Intrinsic::riscv_vlse:
2080 case Intrinsic::riscv_vlse_mask:
2081 case Intrinsic::riscv_vloxei:
2082 case Intrinsic::riscv_vloxei_mask:
2083 case Intrinsic::riscv_vluxei:
2084 case Intrinsic::riscv_vluxei_mask:
2085 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
2086 /*IsStore*/ false,
2087 /*IsUnitStrided*/ false);
2088 case Intrinsic::riscv_vsse:
2089 case Intrinsic::riscv_vsse_mask:
2090 case Intrinsic::riscv_vsoxei:
2091 case Intrinsic::riscv_vsoxei_mask:
2092 case Intrinsic::riscv_vsuxei:
2093 case Intrinsic::riscv_vsuxei_mask:
2094 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
2095 /*IsStore*/ true,
2096 /*IsUnitStrided*/ false);
2097 case Intrinsic::riscv_vlseg2:
2098 case Intrinsic::riscv_vlseg3:
2099 case Intrinsic::riscv_vlseg4:
2100 case Intrinsic::riscv_vlseg5:
2101 case Intrinsic::riscv_vlseg6:
2102 case Intrinsic::riscv_vlseg7:
2103 case Intrinsic::riscv_vlseg8:
2104 case Intrinsic::riscv_vlseg2ff:
2105 case Intrinsic::riscv_vlseg3ff:
2106 case Intrinsic::riscv_vlseg4ff:
2107 case Intrinsic::riscv_vlseg5ff:
2108 case Intrinsic::riscv_vlseg6ff:
2109 case Intrinsic::riscv_vlseg7ff:
2110 case Intrinsic::riscv_vlseg8ff:
2111 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
2112 /*IsStore*/ false,
2113 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
2114 case Intrinsic::riscv_vlseg2_mask:
2115 case Intrinsic::riscv_vlseg3_mask:
2116 case Intrinsic::riscv_vlseg4_mask:
2117 case Intrinsic::riscv_vlseg5_mask:
2118 case Intrinsic::riscv_vlseg6_mask:
2119 case Intrinsic::riscv_vlseg7_mask:
2120 case Intrinsic::riscv_vlseg8_mask:
2121 case Intrinsic::riscv_vlseg2ff_mask:
2122 case Intrinsic::riscv_vlseg3ff_mask:
2123 case Intrinsic::riscv_vlseg4ff_mask:
2124 case Intrinsic::riscv_vlseg5ff_mask:
2125 case Intrinsic::riscv_vlseg6ff_mask:
2126 case Intrinsic::riscv_vlseg7ff_mask:
2127 case Intrinsic::riscv_vlseg8ff_mask:
2128 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
2129 /*IsStore*/ false,
2130 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
2131 case Intrinsic::riscv_vlsseg2:
2132 case Intrinsic::riscv_vlsseg3:
2133 case Intrinsic::riscv_vlsseg4:
2134 case Intrinsic::riscv_vlsseg5:
2135 case Intrinsic::riscv_vlsseg6:
2136 case Intrinsic::riscv_vlsseg7:
2137 case Intrinsic::riscv_vlsseg8:
2138 case Intrinsic::riscv_vloxseg2:
2139 case Intrinsic::riscv_vloxseg3:
2140 case Intrinsic::riscv_vloxseg4:
2141 case Intrinsic::riscv_vloxseg5:
2142 case Intrinsic::riscv_vloxseg6:
2143 case Intrinsic::riscv_vloxseg7:
2144 case Intrinsic::riscv_vloxseg8:
2145 case Intrinsic::riscv_vluxseg2:
2146 case Intrinsic::riscv_vluxseg3:
2147 case Intrinsic::riscv_vluxseg4:
2148 case Intrinsic::riscv_vluxseg5:
2149 case Intrinsic::riscv_vluxseg6:
2150 case Intrinsic::riscv_vluxseg7:
2151 case Intrinsic::riscv_vluxseg8:
2152 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2153 /*IsStore*/ false,
2154 /*IsUnitStrided*/ false);
2155 case Intrinsic::riscv_vlsseg2_mask:
2156 case Intrinsic::riscv_vlsseg3_mask:
2157 case Intrinsic::riscv_vlsseg4_mask:
2158 case Intrinsic::riscv_vlsseg5_mask:
2159 case Intrinsic::riscv_vlsseg6_mask:
2160 case Intrinsic::riscv_vlsseg7_mask:
2161 case Intrinsic::riscv_vlsseg8_mask:
2162 case Intrinsic::riscv_vloxseg2_mask:
2163 case Intrinsic::riscv_vloxseg3_mask:
2164 case Intrinsic::riscv_vloxseg4_mask:
2165 case Intrinsic::riscv_vloxseg5_mask:
2166 case Intrinsic::riscv_vloxseg6_mask:
2167 case Intrinsic::riscv_vloxseg7_mask:
2168 case Intrinsic::riscv_vloxseg8_mask:
2169 case Intrinsic::riscv_vluxseg2_mask:
2170 case Intrinsic::riscv_vluxseg3_mask:
2171 case Intrinsic::riscv_vluxseg4_mask:
2172 case Intrinsic::riscv_vluxseg5_mask:
2173 case Intrinsic::riscv_vluxseg6_mask:
2174 case Intrinsic::riscv_vluxseg7_mask:
2175 case Intrinsic::riscv_vluxseg8_mask:
2176 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
2177 /*IsStore*/ false,
2178 /*IsUnitStrided*/ false);
2179 case Intrinsic::riscv_vsseg2:
2180 case Intrinsic::riscv_vsseg3:
2181 case Intrinsic::riscv_vsseg4:
2182 case Intrinsic::riscv_vsseg5:
2183 case Intrinsic::riscv_vsseg6:
2184 case Intrinsic::riscv_vsseg7:
2185 case Intrinsic::riscv_vsseg8:
2186 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
2187 /*IsStore*/ true,
2188 /*IsUnitStrided*/ false);
2189 case Intrinsic::riscv_vsseg2_mask:
2190 case Intrinsic::riscv_vsseg3_mask:
2191 case Intrinsic::riscv_vsseg4_mask:
2192 case Intrinsic::riscv_vsseg5_mask:
2193 case Intrinsic::riscv_vsseg6_mask:
2194 case Intrinsic::riscv_vsseg7_mask:
2195 case Intrinsic::riscv_vsseg8_mask:
2196 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2197 /*IsStore*/ true,
2198 /*IsUnitStrided*/ false);
2199 case Intrinsic::riscv_vssseg2:
2200 case Intrinsic::riscv_vssseg3:
2201 case Intrinsic::riscv_vssseg4:
2202 case Intrinsic::riscv_vssseg5:
2203 case Intrinsic::riscv_vssseg6:
2204 case Intrinsic::riscv_vssseg7:
2205 case Intrinsic::riscv_vssseg8:
2206 case Intrinsic::riscv_vsoxseg2:
2207 case Intrinsic::riscv_vsoxseg3:
2208 case Intrinsic::riscv_vsoxseg4:
2209 case Intrinsic::riscv_vsoxseg5:
2210 case Intrinsic::riscv_vsoxseg6:
2211 case Intrinsic::riscv_vsoxseg7:
2212 case Intrinsic::riscv_vsoxseg8:
2213 case Intrinsic::riscv_vsuxseg2:
2214 case Intrinsic::riscv_vsuxseg3:
2215 case Intrinsic::riscv_vsuxseg4:
2216 case Intrinsic::riscv_vsuxseg5:
2217 case Intrinsic::riscv_vsuxseg6:
2218 case Intrinsic::riscv_vsuxseg7:
2219 case Intrinsic::riscv_vsuxseg8:
2220 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2221 /*IsStore*/ true,
2222 /*IsUnitStrided*/ false);
2223 case Intrinsic::riscv_vssseg2_mask:
2224 case Intrinsic::riscv_vssseg3_mask:
2225 case Intrinsic::riscv_vssseg4_mask:
2226 case Intrinsic::riscv_vssseg5_mask:
2227 case Intrinsic::riscv_vssseg6_mask:
2228 case Intrinsic::riscv_vssseg7_mask:
2229 case Intrinsic::riscv_vssseg8_mask:
2230 case Intrinsic::riscv_vsoxseg2_mask:
2231 case Intrinsic::riscv_vsoxseg3_mask:
2232 case Intrinsic::riscv_vsoxseg4_mask:
2233 case Intrinsic::riscv_vsoxseg5_mask:
2234 case Intrinsic::riscv_vsoxseg6_mask:
2235 case Intrinsic::riscv_vsoxseg7_mask:
2236 case Intrinsic::riscv_vsoxseg8_mask:
2237 case Intrinsic::riscv_vsuxseg2_mask:
2238 case Intrinsic::riscv_vsuxseg3_mask:
2239 case Intrinsic::riscv_vsuxseg4_mask:
2240 case Intrinsic::riscv_vsuxseg5_mask:
2241 case Intrinsic::riscv_vsuxseg6_mask:
2242 case Intrinsic::riscv_vsuxseg7_mask:
2243 case Intrinsic::riscv_vsuxseg8_mask:
2244 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
2245 /*IsStore*/ true,
2246 /*IsUnitStrided*/ false);
2247 case Intrinsic::riscv_sf_vlte8:
2248 case Intrinsic::riscv_sf_vlte16:
2249 case Intrinsic::riscv_sf_vlte32:
2250 case Intrinsic::riscv_sf_vlte64:
2251 Info.opc = ISD::INTRINSIC_VOID;
2252 Info.ptrVal = I.getArgOperand(1);
2253 switch (Intrinsic) {
2254 case Intrinsic::riscv_sf_vlte8:
2255 Info.memVT = MVT::i8;
2256 Info.align = Align(1);
2257 break;
2258 case Intrinsic::riscv_sf_vlte16:
2259 Info.memVT = MVT::i16;
2260 Info.align = Align(2);
2261 break;
2262 case Intrinsic::riscv_sf_vlte32:
2263 Info.memVT = MVT::i32;
2264 Info.align = Align(4);
2265 break;
2266 case Intrinsic::riscv_sf_vlte64:
2267 Info.memVT = MVT::i64;
2268 Info.align = Align(8);
2269 break;
2270 }
2271 Info.size = MemoryLocation::UnknownSize;
2272 Info.flags |= MachineMemOperand::MOLoad;
2273 return true;
2274 case Intrinsic::riscv_sf_vste8:
2275 case Intrinsic::riscv_sf_vste16:
2276 case Intrinsic::riscv_sf_vste32:
2277 case Intrinsic::riscv_sf_vste64:
2278 Info.opc = ISD::INTRINSIC_VOID;
2279 Info.ptrVal = I.getArgOperand(1);
2280 switch (Intrinsic) {
2281 case Intrinsic::riscv_sf_vste8:
2282 Info.memVT = MVT::i8;
2283 Info.align = Align(1);
2284 break;
2285 case Intrinsic::riscv_sf_vste16:
2286 Info.memVT = MVT::i16;
2287 Info.align = Align(2);
2288 break;
2289 case Intrinsic::riscv_sf_vste32:
2290 Info.memVT = MVT::i32;
2291 Info.align = Align(4);
2292 break;
2293 case Intrinsic::riscv_sf_vste64:
2294 Info.memVT = MVT::i64;
2295 Info.align = Align(8);
2296 break;
2297 }
2298 Info.size = MemoryLocation::UnknownSize;
2299 Info.flags |= MachineMemOperand::MOStore;
2300 return true;
2301 }
2302}
2303
2305 const AddrMode &AM, Type *Ty,
2306 unsigned AS,
2307 Instruction *I) const {
2308 // No global is ever allowed as a base.
2309 if (AM.BaseGV)
2310 return false;
2311
2312 // None of our addressing modes allows a scalable offset
2313 if (AM.ScalableOffset)
2314 return false;
2315
2316 // RVV instructions only support register addressing.
2317 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
2318 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
2319
2320 // Require a 12-bit signed offset.
2321 if (!isInt<12>(AM.BaseOffs))
2322 return false;
2323
2324 switch (AM.Scale) {
2325 case 0: // "r+i" or just "i", depending on HasBaseReg.
2326 break;
2327 case 1:
2328 if (!AM.HasBaseReg) // allow "r+i".
2329 break;
2330 return false; // disallow "r+r" or "r+r+i".
2331 default:
2332 return false;
2333 }
2334
2335 return true;
2336}
2337
2339 return isInt<12>(Imm);
2340}
2341
2343 return isInt<12>(Imm);
2344}
2345
2346// On RV32, 64-bit integers are split into their high and low parts and held
2347// in two different registers, so the trunc is free since the low register can
2348// just be used.
2349// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
2350// isTruncateFree?
2352 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
2353 return false;
2354 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
2355 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
2356 return (SrcBits == 64 && DestBits == 32);
2357}
2358
2360 // We consider i64->i32 free on RV64 since we have good selection of W
2361 // instructions that make promoting operations back to i64 free in many cases.
2362 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
2363 !DstVT.isInteger())
2364 return false;
2365 unsigned SrcBits = SrcVT.getSizeInBits();
2366 unsigned DestBits = DstVT.getSizeInBits();
2367 return (SrcBits == 64 && DestBits == 32);
2368}
2369
2371 EVT SrcVT = Val.getValueType();
2372 // free truncate from vnsrl and vnsra
2373 if (Subtarget.hasVInstructions() &&
2374 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
2375 SrcVT.isVector() && VT2.isVector()) {
2376 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
2377 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
2378 if (SrcBits == DestBits * 2) {
2379 return true;
2380 }
2381 }
2382 return TargetLowering::isTruncateFree(Val, VT2);
2383}
2384
2386 // Zexts are free if they can be combined with a load.
2387 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
2388 // poorly with type legalization of compares preferring sext.
2389 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
2390 EVT MemVT = LD->getMemoryVT();
2391 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
2392 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
2393 LD->getExtensionType() == ISD::ZEXTLOAD))
2394 return true;
2395 }
2396
2397 return TargetLowering::isZExtFree(Val, VT2);
2398}
2399
2401 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
2402}
2403
2405 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
2406}
2407
2409 return Subtarget.hasCTZLike();
2410}
2411
2413 return Subtarget.hasCLZLike();
2414}
2415
2417 const Instruction &AndI) const {
2418 // We expect to be able to match a bit extraction instruction if the Zbs
2419 // extension is supported and the mask is a power of two. However, we
2420 // conservatively return false if the mask would fit in an ANDI instruction,
2421 // on the basis that it's possible the sinking+duplication of the AND in
2422 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2423 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2424 if (!Subtarget.hasBEXTILike())
2425 return false;
2427 if (!Mask)
2428 return false;
2429 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2430}
2431
2433 EVT VT = Y.getValueType();
2434
2435 if (VT.isVector())
2436 return false;
2437
2438 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2439 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2440}
2441
2443 EVT VT = Y.getValueType();
2444
2445 if (!VT.isVector())
2446 return hasAndNotCompare(Y);
2447
2448 return Subtarget.hasStdExtZvkb();
2449}
2450
2452 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2453 if (Subtarget.hasStdExtZbs())
2454 return X.getValueType().isScalarInteger();
2455 auto *C = dyn_cast<ConstantSDNode>(Y);
2456 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2457 if (Subtarget.hasVendorXTHeadBs())
2458 return C != nullptr;
2459 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2460 return C && C->getAPIntValue().ule(10);
2461}
2462
2464 unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X,
2465 SDValue Y) const {
2466 if (SelectOpcode != ISD::VSELECT)
2467 return false;
2468
2469 // Only enable for rvv.
2470 if (!VT.isVector() || !Subtarget.hasVInstructions())
2471 return false;
2472
2473 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2474 return false;
2475
2476 return true;
2477}
2478
2480 Type *Ty) const {
2481 assert(Ty->isIntegerTy());
2482
2483 unsigned BitSize = Ty->getIntegerBitWidth();
2484 if (BitSize > Subtarget.getXLen())
2485 return false;
2486
2487 // Fast path, assume 32-bit immediates are cheap.
2488 int64_t Val = Imm.getSExtValue();
2489 if (isInt<32>(Val))
2490 return true;
2491
2492 // A constant pool entry may be more aligned than the load we're trying to
2493 // replace. If we don't support unaligned scalar mem, prefer the constant
2494 // pool.
2495 // TODO: Can the caller pass down the alignment?
2496 if (!Subtarget.enableUnalignedScalarMem())
2497 return true;
2498
2499 // Prefer to keep the load if it would require many instructions.
2500 // This uses the same threshold we use for constant pools but doesn't
2501 // check useConstantPoolForLargeInts.
2502 // TODO: Should we keep the load only when we're definitely going to emit a
2503 // constant pool?
2504
2506 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2507}
2508
2512 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2513 SelectionDAG &DAG) const {
2514 // One interesting pattern that we'd want to form is 'bit extract':
2515 // ((1 >> Y) & 1) ==/!= 0
2516 // But we also need to be careful not to try to reverse that fold.
2517
2518 // Is this '((1 >> Y) & 1)'?
2519 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2520 return false; // Keep the 'bit extract' pattern.
2521
2522 // Will this be '((1 >> Y) & 1)' after the transform?
2523 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2524 return true; // Do form the 'bit extract' pattern.
2525
2526 // If 'X' is a constant, and we transform, then we will immediately
2527 // try to undo the fold, thus causing endless combine loop.
2528 // So only do the transform if X is not a constant. This matches the default
2529 // implementation of this function.
2530 return !XC;
2531}
2532
2534 unsigned Opc = VecOp.getOpcode();
2535
2536 // Assume target opcodes can't be scalarized.
2537 // TODO - do we have any exceptions?
2538 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2539 return false;
2540
2541 // If the vector op is not supported, try to convert to scalar.
2542 EVT VecVT = VecOp.getValueType();
2544 return true;
2545
2546 // If the vector op is supported, but the scalar op is not, the transform may
2547 // not be worthwhile.
2548 // Permit a vector binary operation can be converted to scalar binary
2549 // operation which is custom lowered with illegal type.
2550 EVT ScalarVT = VecVT.getScalarType();
2551 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2552 isOperationCustom(Opc, ScalarVT);
2553}
2554
2556 const GlobalAddressSDNode *GA) const {
2557 // In order to maximise the opportunity for common subexpression elimination,
2558 // keep a separate ADD node for the global address offset instead of folding
2559 // it in the global address node. Later peephole optimisations may choose to
2560 // fold it back in when profitable.
2561 return false;
2562}
2563
2564// Returns 0-31 if the fli instruction is available for the type and this is
2565// legal FP immediate for the type. Returns -1 otherwise.
2567 if (!Subtarget.hasStdExtZfa())
2568 return -1;
2569
2570 bool IsSupportedVT = false;
2571 if (VT == MVT::f16) {
2572 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2573 } else if (VT == MVT::f32) {
2574 IsSupportedVT = true;
2575 } else if (VT == MVT::f64) {
2576 assert(Subtarget.hasStdExtD() && "Expect D extension");
2577 IsSupportedVT = true;
2578 }
2579
2580 if (!IsSupportedVT)
2581 return -1;
2582
2583 return RISCVLoadFPImm::getLoadFPImm(Imm);
2584}
2585
2587 bool ForCodeSize) const {
2588 bool IsLegalVT = false;
2589 if (VT == MVT::f16)
2590 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2591 else if (VT == MVT::f32)
2592 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2593 else if (VT == MVT::f64)
2594 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2595 else if (VT == MVT::bf16)
2596 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2597
2598 if (!IsLegalVT)
2599 return false;
2600
2601 if (getLegalZfaFPImm(Imm, VT) >= 0)
2602 return true;
2603
2604 // Some constants can be produced by fli+fneg.
2605 if (Imm.isNegative() && getLegalZfaFPImm(-Imm, VT) >= 0)
2606 return true;
2607
2608 // Cannot create a 64 bit floating-point immediate value for rv32.
2609 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2610 // td can handle +0.0 or -0.0 already.
2611 // -0.0 can be created by fmv + fneg.
2612 return Imm.isZero();
2613 }
2614
2615 // Special case: fmv + fneg
2616 if (Imm.isNegZero())
2617 return true;
2618
2619 // Building an integer and then converting requires a fmv at the end of
2620 // the integer sequence. The fmv is not required for Zfinx.
2621 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2622 const int Cost =
2623 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2624 Subtarget.getXLen(), Subtarget);
2625 return Cost <= FPImmCost;
2626}
2627
2628// TODO: This is very conservative.
2630 unsigned Index) const {
2632 return false;
2633
2634 // Extracts from index 0 are just subreg extracts.
2635 if (Index == 0)
2636 return true;
2637
2638 // Only support extracting a fixed from a fixed vector for now.
2639 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2640 return false;
2641
2642 EVT EltVT = ResVT.getVectorElementType();
2643 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2644
2645 // The smallest type we can slide is i8.
2646 // TODO: We can extract index 0 from a mask vector without a slide.
2647 if (EltVT == MVT::i1)
2648 return false;
2649
2650 unsigned ResElts = ResVT.getVectorNumElements();
2651 unsigned SrcElts = SrcVT.getVectorNumElements();
2652
2653 unsigned MinVLen = Subtarget.getRealMinVLen();
2654 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2655
2656 // If we're extracting only data from the first VLEN bits of the source
2657 // then we can always do this with an m1 vslidedown.vx. Restricting the
2658 // Index ensures we can use a vslidedown.vi.
2659 // TODO: We can generalize this when the exact VLEN is known.
2660 if (Index + ResElts <= MinVLMAX && Index < 31)
2661 return true;
2662
2663 // Convervatively only handle extracting half of a vector.
2664 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2665 // the upper half of a vector until we have more test coverage.
2666 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2667 // a cheap extract. However, this case is important in practice for
2668 // shuffled extracts of longer vectors. How resolve?
2669 return (ResElts * 2) == SrcElts && Index == ResElts;
2670}
2671
2673 CallingConv::ID CC,
2674 EVT VT) const {
2675 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2676 // We might still end up using a GPR but that will be decided based on ABI.
2677 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2678 !Subtarget.hasStdExtZfhminOrZhinxmin())
2679 return MVT::f32;
2680
2681 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2682}
2683
2684unsigned
2686 std::optional<MVT> RegisterVT) const {
2687 // Pair inline assembly operand
2688 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2689 *RegisterVT == MVT::Untyped)
2690 return 1;
2691
2692 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2693}
2694
2696 CallingConv::ID CC,
2697 EVT VT) const {
2698 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2699 // We might still end up using a GPR but that will be decided based on ABI.
2700 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2701 !Subtarget.hasStdExtZfhminOrZhinxmin())
2702 return 1;
2703
2704 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2705}
2706
2707// Changes the condition code and swaps operands if necessary, so the SetCC
2708// operation matches one of the comparisons supported directly by branches
2709// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2710// with 1/-1.
2712 ISD::CondCode &CC, SelectionDAG &DAG,
2713 const RISCVSubtarget &Subtarget) {
2714 // If this is a single bit test that can't be handled by ANDI, shift the
2715 // bit to be tested to the MSB and perform a signed compare with 0.
2716 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2717 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2718 isa<ConstantSDNode>(LHS.getOperand(1)) &&
2719 // XAndesPerf supports branch on test bit.
2720 !Subtarget.hasVendorXAndesPerf()) {
2721 uint64_t Mask = LHS.getConstantOperandVal(1);
2722 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2723 unsigned ShAmt = 0;
2724 if (isPowerOf2_64(Mask)) {
2725 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
2726 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2727 } else {
2728 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2729 }
2730
2731 LHS = LHS.getOperand(0);
2732 if (ShAmt != 0)
2733 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2734 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2735 return;
2736 }
2737 }
2738
2739 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2740 int64_t C = RHSC->getSExtValue();
2741 switch (CC) {
2742 default: break;
2743 case ISD::SETGT:
2744 // Convert X > -1 to X >= 0.
2745 if (C == -1) {
2746 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2747 CC = ISD::SETGE;
2748 return;
2749 }
2750 if ((Subtarget.hasVendorXqcicm() || Subtarget.hasVendorXqcicli()) &&
2751 C != INT64_MAX && isInt<5>(C + 1)) {
2752 // We have a conditional move instruction for SETGE but not SETGT.
2753 // Convert X > C to X >= C + 1, if (C + 1) is a 5-bit signed immediate.
2754 RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());
2755 CC = ISD::SETGE;
2756 return;
2757 }
2758 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isInt<16>(C + 1)) {
2759 // We have a branch immediate instruction for SETGE but not SETGT.
2760 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit signed immediate.
2761 RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());
2762 CC = ISD::SETGE;
2763 return;
2764 }
2765 break;
2766 case ISD::SETLT:
2767 // Convert X < 1 to 0 >= X.
2768 if (C == 1) {
2769 RHS = LHS;
2770 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2771 CC = ISD::SETGE;
2772 return;
2773 }
2774 break;
2775 case ISD::SETUGT:
2776 if ((Subtarget.hasVendorXqcicm() || Subtarget.hasVendorXqcicli()) &&
2777 C != INT64_MAX && isUInt<5>(C + 1)) {
2778 // We have a conditional move instruction for SETUGE but not SETUGT.
2779 // Convert X > C to X >= C + 1, if (C + 1) is a 5-bit signed immediate.
2780 RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());
2781 CC = ISD::SETUGE;
2782 return;
2783 }
2784 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isUInt<16>(C + 1)) {
2785 // We have a branch immediate instruction for SETUGE but not SETUGT.
2786 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit unsigned
2787 // immediate.
2788 RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());
2789 CC = ISD::SETUGE;
2790 return;
2791 }
2792 break;
2793 }
2794 }
2795
2796 switch (CC) {
2797 default:
2798 break;
2799 case ISD::SETGT:
2800 case ISD::SETLE:
2801 case ISD::SETUGT:
2802 case ISD::SETULE:
2804 std::swap(LHS, RHS);
2805 break;
2806 }
2807}
2808
2810 if (VT.isRISCVVectorTuple()) {
2811 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2812 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2813 return RISCVVType::LMUL_F8;
2814 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2815 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2816 return RISCVVType::LMUL_F4;
2817 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2818 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2819 return RISCVVType::LMUL_F2;
2820 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2821 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2822 return RISCVVType::LMUL_1;
2823 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2824 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2825 return RISCVVType::LMUL_2;
2826 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2827 return RISCVVType::LMUL_4;
2828 llvm_unreachable("Invalid vector tuple type LMUL.");
2829 }
2830
2831 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2832 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2833 if (VT.getVectorElementType() == MVT::i1)
2834 KnownSize *= 8;
2835
2836 switch (KnownSize) {
2837 default:
2838 llvm_unreachable("Invalid LMUL.");
2839 case 8:
2840 return RISCVVType::LMUL_F8;
2841 case 16:
2842 return RISCVVType::LMUL_F4;
2843 case 32:
2844 return RISCVVType::LMUL_F2;
2845 case 64:
2846 return RISCVVType::LMUL_1;
2847 case 128:
2848 return RISCVVType::LMUL_2;
2849 case 256:
2850 return RISCVVType::LMUL_4;
2851 case 512:
2852 return RISCVVType::LMUL_8;
2853 }
2854}
2855
2857 switch (LMul) {
2858 default:
2859 llvm_unreachable("Invalid LMUL.");
2863 case RISCVVType::LMUL_1:
2864 return RISCV::VRRegClassID;
2865 case RISCVVType::LMUL_2:
2866 return RISCV::VRM2RegClassID;
2867 case RISCVVType::LMUL_4:
2868 return RISCV::VRM4RegClassID;
2869 case RISCVVType::LMUL_8:
2870 return RISCV::VRM8RegClassID;
2871 }
2872}
2873
2874unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2875 RISCVVType::VLMUL LMUL = getLMUL(VT);
2876 if (LMUL == RISCVVType::LMUL_F8 || LMUL == RISCVVType::LMUL_F4 ||
2877 LMUL == RISCVVType::LMUL_F2 || LMUL == RISCVVType::LMUL_1) {
2878 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2879 "Unexpected subreg numbering");
2880 return RISCV::sub_vrm1_0 + Index;
2881 }
2882 if (LMUL == RISCVVType::LMUL_2) {
2883 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2884 "Unexpected subreg numbering");
2885 return RISCV::sub_vrm2_0 + Index;
2886 }
2887 if (LMUL == RISCVVType::LMUL_4) {
2888 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2889 "Unexpected subreg numbering");
2890 return RISCV::sub_vrm4_0 + Index;
2891 }
2892 llvm_unreachable("Invalid vector type.");
2893}
2894
2896 if (VT.isRISCVVectorTuple()) {
2897 unsigned NF = VT.getRISCVVectorTupleNumFields();
2898 unsigned RegsPerField =
2899 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2900 (NF * RISCV::RVVBitsPerBlock));
2901 switch (RegsPerField) {
2902 case 1:
2903 if (NF == 2)
2904 return RISCV::VRN2M1RegClassID;
2905 if (NF == 3)
2906 return RISCV::VRN3M1RegClassID;
2907 if (NF == 4)
2908 return RISCV::VRN4M1RegClassID;
2909 if (NF == 5)
2910 return RISCV::VRN5M1RegClassID;
2911 if (NF == 6)
2912 return RISCV::VRN6M1RegClassID;
2913 if (NF == 7)
2914 return RISCV::VRN7M1RegClassID;
2915 if (NF == 8)
2916 return RISCV::VRN8M1RegClassID;
2917 break;
2918 case 2:
2919 if (NF == 2)
2920 return RISCV::VRN2M2RegClassID;
2921 if (NF == 3)
2922 return RISCV::VRN3M2RegClassID;
2923 if (NF == 4)
2924 return RISCV::VRN4M2RegClassID;
2925 break;
2926 case 4:
2927 assert(NF == 2);
2928 return RISCV::VRN2M4RegClassID;
2929 default:
2930 break;
2931 }
2932 llvm_unreachable("Invalid vector tuple type RegClass.");
2933 }
2934
2935 if (VT.getVectorElementType() == MVT::i1)
2936 return RISCV::VRRegClassID;
2937 return getRegClassIDForLMUL(getLMUL(VT));
2938}
2939
2940// Attempt to decompose a subvector insert/extract between VecVT and
2941// SubVecVT via subregister indices. Returns the subregister index that
2942// can perform the subvector insert/extract with the given element index, as
2943// well as the index corresponding to any leftover subvectors that must be
2944// further inserted/extracted within the register class for SubVecVT.
2945std::pair<unsigned, unsigned>
2947 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2948 const RISCVRegisterInfo *TRI) {
2949 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2950 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2951 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2952 "Register classes not ordered");
2953 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2954 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2955
2956 // If VecVT is a vector tuple type, either it's the tuple type with same
2957 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2958 if (VecVT.isRISCVVectorTuple()) {
2959 if (VecRegClassID == SubRegClassID)
2960 return {RISCV::NoSubRegister, 0};
2961
2962 assert(SubVecVT.isScalableVector() &&
2963 "Only allow scalable vector subvector.");
2964 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2965 "Invalid vector tuple insert/extract for vector and subvector with "
2966 "different LMUL.");
2967 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2968 }
2969
2970 // Try to compose a subregister index that takes us from the incoming
2971 // LMUL>1 register class down to the outgoing one. At each step we half
2972 // the LMUL:
2973 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2974 // Note that this is not guaranteed to find a subregister index, such as
2975 // when we are extracting from one VR type to another.
2976 unsigned SubRegIdx = RISCV::NoSubRegister;
2977 for (const unsigned RCID :
2978 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2979 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2980 VecVT = VecVT.getHalfNumVectorElementsVT();
2981 bool IsHi =
2982 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2983 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2984 getSubregIndexByMVT(VecVT, IsHi));
2985 if (IsHi)
2986 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2987 }
2988 return {SubRegIdx, InsertExtractIdx};
2989}
2990
2991// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2992// stores for those types.
2993bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2994 return !Subtarget.useRVVForFixedLengthVectors() ||
2995 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2996}
2997
2999 if (!ScalarTy.isSimple())
3000 return false;
3001 switch (ScalarTy.getSimpleVT().SimpleTy) {
3002 case MVT::iPTR:
3003 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
3004 case MVT::i8:
3005 case MVT::i16:
3006 case MVT::i32:
3007 return Subtarget.hasVInstructions();
3008 case MVT::i64:
3009 return Subtarget.hasVInstructionsI64();
3010 case MVT::f16:
3011 return Subtarget.hasVInstructionsF16Minimal();
3012 case MVT::bf16:
3013 return Subtarget.hasVInstructionsBF16Minimal();
3014 case MVT::f32:
3015 return Subtarget.hasVInstructionsF32();
3016 case MVT::f64:
3017 return Subtarget.hasVInstructionsF64();
3018 default:
3019 return false;
3020 }
3021}
3022
3023
3025 return NumRepeatedDivisors;
3026}
3027
3029 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3030 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
3031 "Unexpected opcode");
3032 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
3033 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
3035 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
3036 if (!II)
3037 return SDValue();
3038 return Op.getOperand(II->VLOperand + 1 + HasChain);
3039}
3040
3042 const RISCVSubtarget &Subtarget) {
3043 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
3044 if (!Subtarget.useRVVForFixedLengthVectors())
3045 return false;
3046
3047 // We only support a set of vector types with a consistent maximum fixed size
3048 // across all supported vector element types to avoid legalization issues.
3049 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
3050 // fixed-length vector type we support is 1024 bytes.
3051 if (VT.getVectorNumElements() > 1024 || VT.getFixedSizeInBits() > 1024 * 8)
3052 return false;
3053
3054 unsigned MinVLen = Subtarget.getRealMinVLen();
3055
3056 MVT EltVT = VT.getVectorElementType();
3057
3058 // Don't use RVV for vectors we cannot scalarize if required.
3059 switch (EltVT.SimpleTy) {
3060 // i1 is supported but has different rules.
3061 default:
3062 return false;
3063 case MVT::i1:
3064 // Masks can only use a single register.
3065 if (VT.getVectorNumElements() > MinVLen)
3066 return false;
3067 MinVLen /= 8;
3068 break;
3069 case MVT::i8:
3070 case MVT::i16:
3071 case MVT::i32:
3072 break;
3073 case MVT::i64:
3074 if (!Subtarget.hasVInstructionsI64())
3075 return false;
3076 break;
3077 case MVT::f16:
3078 if (!Subtarget.hasVInstructionsF16Minimal())
3079 return false;
3080 break;
3081 case MVT::bf16:
3082 if (!Subtarget.hasVInstructionsBF16Minimal())
3083 return false;
3084 break;
3085 case MVT::f32:
3086 if (!Subtarget.hasVInstructionsF32())
3087 return false;
3088 break;
3089 case MVT::f64:
3090 if (!Subtarget.hasVInstructionsF64())
3091 return false;
3092 break;
3093 }
3094
3095 // Reject elements larger than ELEN.
3096 if (EltVT.getSizeInBits() > Subtarget.getELen())
3097 return false;
3098
3099 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
3100 // Don't use RVV for types that don't fit.
3101 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
3102 return false;
3103
3104 // TODO: Perhaps an artificial restriction, but worth having whilst getting
3105 // the base fixed length RVV support in place.
3106 if (!VT.isPow2VectorType())
3107 return false;
3108
3109 return true;
3110}
3111
3112bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
3113 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
3114}
3115
3116// Return the largest legal scalable vector type that matches VT's element type.
3118 const RISCVSubtarget &Subtarget) {
3119 // This may be called before legal types are setup.
3120 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
3121 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
3122 "Expected legal fixed length vector!");
3123
3124 unsigned MinVLen = Subtarget.getRealMinVLen();
3125 unsigned MaxELen = Subtarget.getELen();
3126
3127 MVT EltVT = VT.getVectorElementType();
3128 switch (EltVT.SimpleTy) {
3129 default:
3130 llvm_unreachable("unexpected element type for RVV container");
3131 case MVT::i1:
3132 case MVT::i8:
3133 case MVT::i16:
3134 case MVT::i32:
3135 case MVT::i64:
3136 case MVT::bf16:
3137 case MVT::f16:
3138 case MVT::f32:
3139 case MVT::f64: {
3140 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
3141 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
3142 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
3143 unsigned NumElts =
3145 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
3146 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
3147 return MVT::getScalableVectorVT(EltVT, NumElts);
3148 }
3149 }
3150}
3151
3153 const RISCVSubtarget &Subtarget) {
3155 Subtarget);
3156}
3157
3159 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
3160}
3161
3162// Grow V to consume an entire RVV register.
3164 const RISCVSubtarget &Subtarget) {
3165 assert(VT.isScalableVector() &&
3166 "Expected to convert into a scalable vector!");
3167 assert(V.getValueType().isFixedLengthVector() &&
3168 "Expected a fixed length vector operand!");
3169 SDLoc DL(V);
3170 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), V, 0);
3171}
3172
3173// Shrink V so it's just big enough to maintain a VT's worth of data.
3175 const RISCVSubtarget &Subtarget) {
3177 "Expected to convert into a fixed length vector!");
3178 assert(V.getValueType().isScalableVector() &&
3179 "Expected a scalable vector operand!");
3180 SDLoc DL(V);
3181 return DAG.getExtractSubvector(DL, VT, V, 0);
3182}
3183
3184/// Return the type of the mask type suitable for masking the provided
3185/// vector type. This is simply an i1 element type vector of the same
3186/// (possibly scalable) length.
3187static MVT getMaskTypeFor(MVT VecVT) {
3188 assert(VecVT.isVector());
3190 return MVT::getVectorVT(MVT::i1, EC);
3191}
3192
3193/// Creates an all ones mask suitable for masking a vector of type VecTy with
3194/// vector length VL. .
3195static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
3196 SelectionDAG &DAG) {
3197 MVT MaskVT = getMaskTypeFor(VecVT);
3198 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3199}
3200
3201static std::pair<SDValue, SDValue>
3203 const RISCVSubtarget &Subtarget) {
3204 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
3205 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
3206 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
3207 return {Mask, VL};
3208}
3209
3210static std::pair<SDValue, SDValue>
3211getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
3212 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
3213 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
3214 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
3215 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
3216 return {Mask, VL};
3217}
3218
3219// Gets the two common "VL" operands: an all-ones mask and the vector length.
3220// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
3221// the vector type that the fixed-length vector is contained in. Otherwise if
3222// VecVT is scalable, then ContainerVT should be the same as VecVT.
3223static std::pair<SDValue, SDValue>
3224getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
3225 const RISCVSubtarget &Subtarget) {
3226 if (VecVT.isFixedLengthVector())
3227 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
3228 Subtarget);
3229 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
3230 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
3231}
3232
3234 SelectionDAG &DAG) const {
3235 assert(VecVT.isScalableVector() && "Expected scalable vector");
3236 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
3237 VecVT.getVectorElementCount());
3238}
3239
3240std::pair<unsigned, unsigned>
3242 const RISCVSubtarget &Subtarget) {
3243 assert(VecVT.isScalableVector() && "Expected scalable vector");
3244
3245 unsigned EltSize = VecVT.getScalarSizeInBits();
3246 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
3247
3248 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
3249 unsigned MaxVLMAX =
3250 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
3251
3252 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
3253 unsigned MinVLMAX =
3254 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
3255
3256 return std::make_pair(MinVLMAX, MaxVLMAX);
3257}
3258
3259// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
3260// of either is (currently) supported. This can get us into an infinite loop
3261// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
3262// as a ..., etc.
3263// Until either (or both) of these can reliably lower any node, reporting that
3264// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
3265// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
3266// which is not desirable.
3268 EVT VT, unsigned DefinedValues) const {
3269 return false;
3270}
3271
3273 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
3274 // implementation-defined.
3275 if (!VT.isVector())
3277 unsigned DLenFactor = Subtarget.getDLenFactor();
3278 unsigned Cost;
3279 if (VT.isScalableVector()) {
3280 unsigned LMul;
3281 bool Fractional;
3282 std::tie(LMul, Fractional) =
3284 if (Fractional)
3285 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
3286 else
3287 Cost = (LMul * DLenFactor);
3288 } else {
3289 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
3290 }
3291 return Cost;
3292}
3293
3294
3295/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
3296/// may be quadratic in the number of vreg implied by LMUL, and is assumed to
3297/// be by default. VRGatherCostModel reflects available options. Note that
3298/// operand (index and possibly mask) are handled separately.
3300 auto LMULCost = getLMULCost(VT);
3301 bool Log2CostModel =
3302 Subtarget.getVRGatherCostModel() == llvm::RISCVSubtarget::NLog2N;
3303 if (Log2CostModel && LMULCost.isValid()) {
3304 unsigned Log = Log2_64(LMULCost.getValue());
3305 if (Log > 0)
3306 return LMULCost * Log;
3307 }
3308 return LMULCost * LMULCost;
3309}
3310
3311/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
3312/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
3313/// or may track the vrgather.vv cost. It is implementation-dependent.
3317
3318/// Return the cost of a vslidedown.vx or vslideup.vx instruction
3319/// for the type VT. (This does not cover the vslide1up or vslide1down
3320/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3321/// or may track the vrgather.vv cost. It is implementation-dependent.
3325
3326/// Return the cost of a vslidedown.vi or vslideup.vi instruction
3327/// for the type VT. (This does not cover the vslide1up or vslide1down
3328/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3329/// or may track the vrgather.vv cost. It is implementation-dependent.
3333
3335 const RISCVSubtarget &Subtarget) {
3336 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
3337 // bf16 conversions are always promoted to f32.
3338 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3339 Op.getValueType() == MVT::bf16) {
3340 bool IsStrict = Op->isStrictFPOpcode();
3341
3342 SDLoc DL(Op);
3343 if (IsStrict) {
3344 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
3345 {Op.getOperand(0), Op.getOperand(1)});
3346 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
3347 {Op.getValueType(), MVT::Other},
3348 {Val.getValue(1), Val.getValue(0),
3349 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
3350 }
3351 return DAG.getNode(
3352 ISD::FP_ROUND, DL, Op.getValueType(),
3353 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
3354 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
3355 }
3356
3357 // Other operations are legal.
3358 return Op;
3359}
3360
3362 const RISCVSubtarget &Subtarget) {
3363 // RISC-V FP-to-int conversions saturate to the destination register size, but
3364 // don't produce 0 for nan. We can use a conversion instruction and fix the
3365 // nan case with a compare and a select.
3366 SDValue Src = Op.getOperand(0);
3367
3368 MVT DstVT = Op.getSimpleValueType();
3369 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3370
3371 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
3372
3373 if (!DstVT.isVector()) {
3374 // For bf16 or for f16 in absence of Zfh, promote to f32, then saturate
3375 // the result.
3376 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3377 Src.getValueType() == MVT::bf16) {
3378 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
3379 }
3380
3381 unsigned Opc;
3382 if (SatVT == DstVT)
3383 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
3384 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
3385 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
3386 else
3387 return SDValue();
3388 // FIXME: Support other SatVTs by clamping before or after the conversion.
3389
3390 SDLoc DL(Op);
3391 SDValue FpToInt = DAG.getNode(
3392 Opc, DL, DstVT, Src,
3394
3395 if (Opc == RISCVISD::FCVT_WU_RV64)
3396 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
3397
3398 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
3399 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
3401 }
3402
3403 // Vectors.
3404
3405 MVT DstEltVT = DstVT.getVectorElementType();
3406 MVT SrcVT = Src.getSimpleValueType();
3407 MVT SrcEltVT = SrcVT.getVectorElementType();
3408 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3409 unsigned DstEltSize = DstEltVT.getSizeInBits();
3410
3411 // Only handle saturating to the destination type.
3412 if (SatVT != DstEltVT)
3413 return SDValue();
3414
3415 MVT DstContainerVT = DstVT;
3416 MVT SrcContainerVT = SrcVT;
3417 if (DstVT.isFixedLengthVector()) {
3418 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3419 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3420 assert(DstContainerVT.getVectorElementCount() ==
3421 SrcContainerVT.getVectorElementCount() &&
3422 "Expected same element count");
3423 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3424 }
3425
3426 SDLoc DL(Op);
3427
3428 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
3429
3430 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
3431 {Src, Src, DAG.getCondCode(ISD::SETNE),
3432 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
3433
3434 // Need to widen by more than 1 step, promote the FP type, then do a widening
3435 // convert.
3436 if (DstEltSize > (2 * SrcEltSize)) {
3437 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
3438 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
3439 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
3440 }
3441
3442 MVT CvtContainerVT = DstContainerVT;
3443 MVT CvtEltVT = DstEltVT;
3444 if (SrcEltSize > (2 * DstEltSize)) {
3445 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
3446 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3447 }
3448
3449 unsigned RVVOpc =
3450 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
3451 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
3452
3453 while (CvtContainerVT != DstContainerVT) {
3454 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
3455 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3456 // Rounding mode here is arbitrary since we aren't shifting out any bits.
3457 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
3458 : RISCVISD::TRUNCATE_VECTOR_VL_USAT;
3459 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
3460 }
3461
3462 SDValue SplatZero = DAG.getNode(
3463 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3464 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3465 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3466 Res, DAG.getUNDEF(DstContainerVT), VL);
3467
3468 if (DstVT.isFixedLengthVector())
3469 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3470
3471 return Res;
3472}
3473
3475 const RISCVSubtarget &Subtarget) {
3476 bool IsStrict = Op->isStrictFPOpcode();
3477 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3478
3479 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3480 // bf16 conversions are always promoted to f32.
3481 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3482 SrcVal.getValueType() == MVT::bf16) {
3483 SDLoc DL(Op);
3484 if (IsStrict) {
3485 SDValue Ext =
3486 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3487 {Op.getOperand(0), SrcVal});
3488 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3489 {Ext.getValue(1), Ext.getValue(0)});
3490 }
3491 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3492 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3493 }
3494
3495 // Other operations are legal.
3496 return Op;
3497}
3498
3500 switch (Opc) {
3501 case ISD::FROUNDEVEN:
3503 case ISD::VP_FROUNDEVEN:
3504 return RISCVFPRndMode::RNE;
3505 case ISD::FTRUNC:
3506 case ISD::STRICT_FTRUNC:
3507 case ISD::VP_FROUNDTOZERO:
3508 return RISCVFPRndMode::RTZ;
3509 case ISD::FFLOOR:
3510 case ISD::STRICT_FFLOOR:
3511 case ISD::VP_FFLOOR:
3512 return RISCVFPRndMode::RDN;
3513 case ISD::FCEIL:
3514 case ISD::STRICT_FCEIL:
3515 case ISD::VP_FCEIL:
3516 return RISCVFPRndMode::RUP;
3517 case ISD::FROUND:
3518 case ISD::LROUND:
3519 case ISD::LLROUND:
3520 case ISD::STRICT_FROUND:
3521 case ISD::STRICT_LROUND:
3523 case ISD::VP_FROUND:
3524 return RISCVFPRndMode::RMM;
3525 case ISD::FRINT:
3526 case ISD::LRINT:
3527 case ISD::LLRINT:
3528 case ISD::STRICT_FRINT:
3529 case ISD::STRICT_LRINT:
3530 case ISD::STRICT_LLRINT:
3531 case ISD::VP_FRINT:
3532 case ISD::VP_LRINT:
3533 case ISD::VP_LLRINT:
3534 return RISCVFPRndMode::DYN;
3535 }
3536
3538}
3539
3540// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3541// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3542// the integer domain and back. Taking care to avoid converting values that are
3543// nan or already correct.
3544static SDValue
3546 const RISCVSubtarget &Subtarget) {
3547 MVT VT = Op.getSimpleValueType();
3548 assert(VT.isVector() && "Unexpected type");
3549
3550 SDLoc DL(Op);
3551
3552 SDValue Src = Op.getOperand(0);
3553
3554 // Freeze the source since we are increasing the number of uses.
3555 Src = DAG.getFreeze(Src);
3556
3557 MVT ContainerVT = VT;
3558 if (VT.isFixedLengthVector()) {
3559 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3560 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3561 }
3562
3563 SDValue Mask, VL;
3564 if (Op->isVPOpcode()) {
3565 Mask = Op.getOperand(1);
3566 if (VT.isFixedLengthVector())
3567 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3568 Subtarget);
3569 VL = Op.getOperand(2);
3570 } else {
3571 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3572 }
3573
3574 // We do the conversion on the absolute value and fix the sign at the end.
3575 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3576
3577 // Determine the largest integer that can be represented exactly. This and
3578 // values larger than it don't have any fractional bits so don't need to
3579 // be converted.
3580 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3581 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3582 APFloat MaxVal = APFloat(FltSem);
3583 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3584 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3585 SDValue MaxValNode =
3586 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3587 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3588 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3589
3590 // If abs(Src) was larger than MaxVal or nan, keep it.
3591 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3592 Mask =
3593 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3594 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3595 Mask, Mask, VL});
3596
3597 // Truncate to integer and convert back to FP.
3598 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3599 MVT XLenVT = Subtarget.getXLenVT();
3600 SDValue Truncated;
3601
3602 switch (Op.getOpcode()) {
3603 default:
3604 llvm_unreachable("Unexpected opcode");
3605 case ISD::FRINT:
3606 case ISD::VP_FRINT:
3607 case ISD::FCEIL:
3608 case ISD::VP_FCEIL:
3609 case ISD::FFLOOR:
3610 case ISD::VP_FFLOOR:
3611 case ISD::FROUND:
3612 case ISD::FROUNDEVEN:
3613 case ISD::VP_FROUND:
3614 case ISD::VP_FROUNDEVEN:
3615 case ISD::VP_FROUNDTOZERO: {
3618 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3619 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3620 break;
3621 }
3622 case ISD::FTRUNC:
3623 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3624 Mask, VL);
3625 break;
3626 case ISD::FNEARBYINT:
3627 case ISD::VP_FNEARBYINT:
3628 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3629 Mask, VL);
3630 break;
3631 }
3632
3633 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3634 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3635 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3636 Mask, VL);
3637
3638 // Restore the original sign so that -0.0 is preserved.
3639 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3640 Src, Src, Mask, VL);
3641
3642 if (!VT.isFixedLengthVector())
3643 return Truncated;
3644
3645 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3646}
3647
3648// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3649// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3650// qNan and converting the new source to integer and back to FP.
3651static SDValue
3653 const RISCVSubtarget &Subtarget) {
3654 SDLoc DL(Op);
3655 MVT VT = Op.getSimpleValueType();
3656 SDValue Chain = Op.getOperand(0);
3657 SDValue Src = Op.getOperand(1);
3658
3659 MVT ContainerVT = VT;
3660 if (VT.isFixedLengthVector()) {
3661 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3662 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3663 }
3664
3665 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3666
3667 // Freeze the source since we are increasing the number of uses.
3668 Src = DAG.getFreeze(Src);
3669
3670 // Convert sNan to qNan by executing x + x for all unordered element x in Src.
3671 MVT MaskVT = Mask.getSimpleValueType();
3672 SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
3673 DAG.getVTList(MaskVT, MVT::Other),
3674 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3675 DAG.getUNDEF(MaskVT), Mask, VL});
3676 Chain = Unorder.getValue(1);
3677 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
3678 DAG.getVTList(ContainerVT, MVT::Other),
3679 {Chain, Src, Src, Src, Unorder, VL});
3680 Chain = Src.getValue(1);
3681
3682 // We do the conversion on the absolute value and fix the sign at the end.
3683 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3684
3685 // Determine the largest integer that can be represented exactly. This and
3686 // values larger than it don't have any fractional bits so don't need to
3687 // be converted.
3688 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3689 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3690 APFloat MaxVal = APFloat(FltSem);
3691 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3692 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3693 SDValue MaxValNode =
3694 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3695 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3696 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3697
3698 // If abs(Src) was larger than MaxVal or nan, keep it.
3699 Mask = DAG.getNode(
3700 RISCVISD::SETCC_VL, DL, MaskVT,
3701 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3702
3703 // Truncate to integer and convert back to FP.
3704 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3705 MVT XLenVT = Subtarget.getXLenVT();
3706 SDValue Truncated;
3707
3708 switch (Op.getOpcode()) {
3709 default:
3710 llvm_unreachable("Unexpected opcode");
3711 case ISD::STRICT_FCEIL:
3712 case ISD::STRICT_FFLOOR:
3713 case ISD::STRICT_FROUND:
3717 Truncated = DAG.getNode(
3718 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3719 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3720 break;
3721 }
3722 case ISD::STRICT_FTRUNC:
3723 Truncated =
3724 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
3725 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3726 break;
3728 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
3729 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3730 Mask, VL);
3731 break;
3732 }
3733 Chain = Truncated.getValue(1);
3734
3735 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3736 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3737 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3738 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3739 Truncated, Mask, VL);
3740 Chain = Truncated.getValue(1);
3741 }
3742
3743 // Restore the original sign so that -0.0 is preserved.
3744 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3745 Src, Src, Mask, VL);
3746
3747 if (VT.isFixedLengthVector())
3748 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3749 return DAG.getMergeValues({Truncated, Chain}, DL);
3750}
3751
3752static SDValue
3754 const RISCVSubtarget &Subtarget) {
3755 MVT VT = Op.getSimpleValueType();
3756 if (VT.isVector())
3757 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3758
3759 if (DAG.shouldOptForSize())
3760 return SDValue();
3761
3762 SDLoc DL(Op);
3763 SDValue Src = Op.getOperand(0);
3764
3765 // Create an integer the size of the mantissa with the MSB set. This and all
3766 // values larger than it don't have any fractional bits so don't need to be
3767 // converted.
3768 const fltSemantics &FltSem = VT.getFltSemantics();
3769 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3770 APFloat MaxVal = APFloat(FltSem);
3771 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3772 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3773 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3774
3776 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3777 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3778}
3779
3780// Expand vector [L]LRINT and [L]LROUND by converting to the integer domain.
3782 const RISCVSubtarget &Subtarget) {
3783 SDLoc DL(Op);
3784 MVT DstVT = Op.getSimpleValueType();
3785 SDValue Src = Op.getOperand(0);
3786 MVT SrcVT = Src.getSimpleValueType();
3787 assert(SrcVT.isVector() && DstVT.isVector() &&
3788 !(SrcVT.isFixedLengthVector() ^ DstVT.isFixedLengthVector()) &&
3789 "Unexpected type");
3790
3791 MVT DstContainerVT = DstVT;
3792 MVT SrcContainerVT = SrcVT;
3793
3794 if (DstVT.isFixedLengthVector()) {
3795 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3796 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3797 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3798 }
3799
3800 auto [Mask, VL] = getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
3801
3802 // [b]f16 -> f32
3803 MVT SrcElemType = SrcVT.getVectorElementType();
3804 if (SrcElemType == MVT::f16 || SrcElemType == MVT::bf16) {
3805 MVT F32VT = SrcContainerVT.changeVectorElementType(MVT::f32);
3806 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, F32VT, Src, Mask, VL);
3807 }
3808
3809 SDValue Res =
3810 DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, DstContainerVT, Src, Mask,
3811 DAG.getTargetConstant(matchRoundingOp(Op.getOpcode()), DL,
3812 Subtarget.getXLenVT()),
3813 VL);
3814
3815 if (!DstVT.isFixedLengthVector())
3816 return Res;
3817
3818 return convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3819}
3820
3821static SDValue
3823 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3824 SDValue Offset, SDValue Mask, SDValue VL,
3826 if (Passthru.isUndef())
3828 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3829 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3830 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3831}
3832
3833static SDValue
3834getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3835 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3836 SDValue VL,
3838 if (Passthru.isUndef())
3840 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3841 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3842 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3843}
3844
3848 int64_t Addend;
3849};
3850
3851static std::optional<APInt> getExactInteger(const APFloat &APF,
3853 // We will use a SINT_TO_FP to materialize this constant so we should use a
3854 // signed APSInt here.
3855 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3856 // We use an arbitrary rounding mode here. If a floating-point is an exact
3857 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3858 // the rounding mode changes the output value, then it is not an exact
3859 // integer.
3861 bool IsExact;
3862 // If it is out of signed integer range, it will return an invalid operation.
3863 // If it is not an exact integer, IsExact is false.
3864 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3866 !IsExact)
3867 return std::nullopt;
3868 return ValInt.extractBits(BitWidth, 0);
3869}
3870
3871// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3872// to the (non-zero) step S and start value X. This can be then lowered as the
3873// RVV sequence (VID * S) + X, for example.
3874// The step S is represented as an integer numerator divided by a positive
3875// denominator. Note that the implementation currently only identifies
3876// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3877// cannot detect 2/3, for example.
3878// Note that this method will also match potentially unappealing index
3879// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3880// determine whether this is worth generating code for.
3881//
3882// EltSizeInBits is the size of the type that the sequence will be calculated
3883// in, i.e. SEW for build_vectors or XLEN for address calculations.
3884static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3885 unsigned EltSizeInBits) {
3886 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3888 return std::nullopt;
3889 bool IsInteger = Op.getValueType().isInteger();
3890
3891 std::optional<unsigned> SeqStepDenom;
3892 std::optional<APInt> SeqStepNum;
3893 std::optional<APInt> SeqAddend;
3894 std::optional<std::pair<APInt, unsigned>> PrevElt;
3895 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3896
3897 // First extract the ops into a list of constant integer values. This may not
3898 // be possible for floats if they're not all representable as integers.
3899 SmallVector<std::optional<APInt>> Elts(Op.getNumOperands());
3900 const unsigned OpSize = Op.getScalarValueSizeInBits();
3901 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3902 if (Elt.isUndef()) {
3903 Elts[Idx] = std::nullopt;
3904 continue;
3905 }
3906 if (IsInteger) {
3907 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3908 } else {
3909 auto ExactInteger =
3910 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3911 if (!ExactInteger)
3912 return std::nullopt;
3913 Elts[Idx] = *ExactInteger;
3914 }
3915 }
3916
3917 for (auto [Idx, Elt] : enumerate(Elts)) {
3918 // Assume undef elements match the sequence; we just have to be careful
3919 // when interpolating across them.
3920 if (!Elt)
3921 continue;
3922
3923 if (PrevElt) {
3924 // Calculate the step since the last non-undef element, and ensure
3925 // it's consistent across the entire sequence.
3926 unsigned IdxDiff = Idx - PrevElt->second;
3927 APInt ValDiff = *Elt - PrevElt->first;
3928
3929 // A zero-value value difference means that we're somewhere in the middle
3930 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3931 // step change before evaluating the sequence.
3932 if (ValDiff == 0)
3933 continue;
3934
3935 int64_t Remainder = ValDiff.srem(IdxDiff);
3936 // Normalize the step if it's greater than 1.
3937 if (Remainder != ValDiff.getSExtValue()) {
3938 // The difference must cleanly divide the element span.
3939 if (Remainder != 0)
3940 return std::nullopt;
3941 ValDiff = ValDiff.sdiv(IdxDiff);
3942 IdxDiff = 1;
3943 }
3944
3945 if (!SeqStepNum)
3946 SeqStepNum = ValDiff;
3947 else if (ValDiff != SeqStepNum)
3948 return std::nullopt;
3949
3950 if (!SeqStepDenom)
3951 SeqStepDenom = IdxDiff;
3952 else if (IdxDiff != *SeqStepDenom)
3953 return std::nullopt;
3954 }
3955
3956 // Record this non-undef element for later.
3957 if (!PrevElt || PrevElt->first != *Elt)
3958 PrevElt = std::make_pair(*Elt, Idx);
3959 }
3960
3961 // We need to have logged a step for this to count as a legal index sequence.
3962 if (!SeqStepNum || !SeqStepDenom)
3963 return std::nullopt;
3964
3965 // Loop back through the sequence and validate elements we might have skipped
3966 // while waiting for a valid step. While doing this, log any sequence addend.
3967 for (auto [Idx, Elt] : enumerate(Elts)) {
3968 if (!Elt)
3969 continue;
3970 APInt ExpectedVal =
3971 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3972 *SeqStepNum)
3973 .sdiv(*SeqStepDenom);
3974
3975 APInt Addend = *Elt - ExpectedVal;
3976 if (!SeqAddend)
3977 SeqAddend = Addend;
3978 else if (Addend != SeqAddend)
3979 return std::nullopt;
3980 }
3981
3982 assert(SeqAddend && "Must have an addend if we have a step");
3983
3984 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3985 SeqAddend->getSExtValue()};
3986}
3987
3988// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3989// and lower it as a VRGATHER_VX_VL from the source vector.
3990static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3991 SelectionDAG &DAG,
3992 const RISCVSubtarget &Subtarget) {
3993 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3994 return SDValue();
3995 SDValue Src = SplatVal.getOperand(0);
3996 // Don't perform this optimization for i1 vectors, or if the element types are
3997 // different
3998 // FIXME: Support i1 vectors, maybe by promoting to i8?
3999 MVT EltTy = VT.getVectorElementType();
4000 if (EltTy == MVT::i1 ||
4001 !DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType()))
4002 return SDValue();
4003 MVT SrcVT = Src.getSimpleValueType();
4004 if (EltTy != SrcVT.getVectorElementType())
4005 return SDValue();
4006 SDValue Idx = SplatVal.getOperand(1);
4007 // The index must be a legal type.
4008 if (Idx.getValueType() != Subtarget.getXLenVT())
4009 return SDValue();
4010
4011 // Check that we know Idx lies within VT
4012 if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {
4013 auto *CIdx = dyn_cast<ConstantSDNode>(Idx);
4014 if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())
4015 return SDValue();
4016 }
4017
4018 // Convert fixed length vectors to scalable
4019 MVT ContainerVT = VT;
4020 if (VT.isFixedLengthVector())
4021 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4022
4023 MVT SrcContainerVT = SrcVT;
4024 if (SrcVT.isFixedLengthVector()) {
4025 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4026 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4027 }
4028
4029 // Put Vec in a VT sized vector
4030 if (SrcContainerVT.getVectorMinNumElements() <
4031 ContainerVT.getVectorMinNumElements())
4032 Src = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Src, 0);
4033 else
4034 Src = DAG.getExtractSubvector(DL, ContainerVT, Src, 0);
4035
4036 // We checked that Idx fits inside VT earlier
4037 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4038 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Src,
4039 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
4040 if (VT.isFixedLengthVector())
4041 Gather = convertFromScalableVector(VT, Gather, DAG, Subtarget);
4042 return Gather;
4043}
4044
4046 const RISCVSubtarget &Subtarget) {
4047 MVT VT = Op.getSimpleValueType();
4048 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4049
4050 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4051
4052 SDLoc DL(Op);
4053 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4054
4055 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
4056 int64_t StepNumerator = SimpleVID->StepNumerator;
4057 unsigned StepDenominator = SimpleVID->StepDenominator;
4058 int64_t Addend = SimpleVID->Addend;
4059
4060 assert(StepNumerator != 0 && "Invalid step");
4061 bool Negate = false;
4062 int64_t SplatStepVal = StepNumerator;
4063 unsigned StepOpcode = ISD::MUL;
4064 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
4065 // anyway as the shift of 63 won't fit in uimm5.
4066 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
4067 isPowerOf2_64(std::abs(StepNumerator))) {
4068 Negate = StepNumerator < 0;
4069 StepOpcode = ISD::SHL;
4070 SplatStepVal = Log2_64(std::abs(StepNumerator));
4071 }
4072
4073 // Only emit VIDs with suitably-small steps. We use imm5 as a threshold
4074 // since it's the immediate value many RVV instructions accept. There is
4075 // no vmul.vi instruction so ensure multiply constant can fit in a
4076 // single addi instruction. For the addend, we allow up to 32 bits..
4077 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
4078 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
4079 isPowerOf2_32(StepDenominator) &&
4080 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<32>(Addend)) {
4081 MVT VIDVT =
4083 MVT VIDContainerVT =
4084 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
4085 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
4086 // Convert right out of the scalable type so we can use standard ISD
4087 // nodes for the rest of the computation. If we used scalable types with
4088 // these, we'd lose the fixed-length vector info and generate worse
4089 // vsetvli code.
4090 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
4091 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
4092 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
4093 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
4094 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
4095 }
4096 if (StepDenominator != 1) {
4097 SDValue SplatStep =
4098 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
4099 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
4100 }
4101 if (Addend != 0 || Negate) {
4102 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
4103 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
4104 VID);
4105 }
4106 if (VT.isFloatingPoint()) {
4107 // TODO: Use vfwcvt to reduce register pressure.
4108 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
4109 }
4110 return VID;
4111 }
4112 }
4113
4114 return SDValue();
4115}
4116
4117/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
4118/// which constitute a large proportion of the elements. In such cases we can
4119/// splat a vector with the dominant element and make up the shortfall with
4120/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
4121/// Note that this includes vectors of 2 elements by association. The
4122/// upper-most element is the "dominant" one, allowing us to use a splat to
4123/// "insert" the upper element, and an insert of the lower element at position
4124/// 0, which improves codegen.
4126 const RISCVSubtarget &Subtarget) {
4127 MVT VT = Op.getSimpleValueType();
4128 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4129
4130 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4131
4132 SDLoc DL(Op);
4133 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4134
4135 MVT XLenVT = Subtarget.getXLenVT();
4136 unsigned NumElts = Op.getNumOperands();
4137
4138 SDValue DominantValue;
4139 unsigned MostCommonCount = 0;
4140 DenseMap<SDValue, unsigned> ValueCounts;
4141 unsigned NumUndefElts =
4142 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4143
4144 // Track the number of scalar loads we know we'd be inserting, estimated as
4145 // any non-zero floating-point constant. Other kinds of element are either
4146 // already in registers or are materialized on demand. The threshold at which
4147 // a vector load is more desirable than several scalar materializion and
4148 // vector-insertion instructions is not known.
4149 unsigned NumScalarLoads = 0;
4150
4151 for (SDValue V : Op->op_values()) {
4152 if (V.isUndef())
4153 continue;
4154
4155 unsigned &Count = ValueCounts[V];
4156 if (0 == Count)
4157 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
4158 NumScalarLoads += !CFP->isExactlyValue(+0.0);
4159
4160 // Is this value dominant? In case of a tie, prefer the highest element as
4161 // it's cheaper to insert near the beginning of a vector than it is at the
4162 // end.
4163 if (++Count >= MostCommonCount) {
4164 DominantValue = V;
4165 MostCommonCount = Count;
4166 }
4167 }
4168
4169 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
4170 unsigned NumDefElts = NumElts - NumUndefElts;
4171 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
4172
4173 // Don't perform this optimization when optimizing for size, since
4174 // materializing elements and inserting them tends to cause code bloat.
4175 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
4176 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
4177 ((MostCommonCount > DominantValueCountThreshold) ||
4178 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
4179 // Start by splatting the most common element.
4180 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
4181
4182 DenseSet<SDValue> Processed{DominantValue};
4183
4184 // We can handle an insert into the last element (of a splat) via
4185 // v(f)slide1down. This is slightly better than the vslideup insert
4186 // lowering as it avoids the need for a vector group temporary. It
4187 // is also better than using vmerge.vx as it avoids the need to
4188 // materialize the mask in a vector register.
4189 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
4190 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
4191 LastOp != DominantValue) {
4192 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4193 auto OpCode =
4194 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
4195 if (!VT.isFloatingPoint())
4196 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
4197 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4198 LastOp, Mask, VL);
4199 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
4200 Processed.insert(LastOp);
4201 }
4202
4203 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
4204 for (const auto &OpIdx : enumerate(Op->ops())) {
4205 const SDValue &V = OpIdx.value();
4206 if (V.isUndef() || !Processed.insert(V).second)
4207 continue;
4208 if (ValueCounts[V] == 1) {
4209 Vec = DAG.getInsertVectorElt(DL, Vec, V, OpIdx.index());
4210 } else {
4211 // Blend in all instances of this value using a VSELECT, using a
4212 // mask where each bit signals whether that element is the one
4213 // we're after.
4215 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
4216 return DAG.getConstant(V == V1, DL, XLenVT);
4217 });
4218 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
4219 DAG.getBuildVector(SelMaskTy, DL, Ops),
4220 DAG.getSplatBuildVector(VT, DL, V), Vec);
4221 }
4222 }
4223
4224 return Vec;
4225 }
4226
4227 return SDValue();
4228}
4229
4231 const RISCVSubtarget &Subtarget) {
4232 MVT VT = Op.getSimpleValueType();
4233 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4234
4235 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4236
4237 SDLoc DL(Op);
4238 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4239
4240 MVT XLenVT = Subtarget.getXLenVT();
4241 unsigned NumElts = Op.getNumOperands();
4242
4243 if (VT.getVectorElementType() == MVT::i1) {
4244 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
4245 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
4246 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
4247 }
4248
4249 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
4250 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
4251 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
4252 }
4253
4254 // Lower constant mask BUILD_VECTORs via an integer vector type, in
4255 // scalar integer chunks whose bit-width depends on the number of mask
4256 // bits and XLEN.
4257 // First, determine the most appropriate scalar integer type to use. This
4258 // is at most XLenVT, but may be shrunk to a smaller vector element type
4259 // according to the size of the final vector - use i8 chunks rather than
4260 // XLenVT if we're producing a v8i1. This results in more consistent
4261 // codegen across RV32 and RV64.
4262 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
4263 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
4264 // If we have to use more than one INSERT_VECTOR_ELT then this
4265 // optimization is likely to increase code size; avoid performing it in
4266 // such a case. We can use a load from a constant pool in this case.
4267 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
4268 return SDValue();
4269 // Now we can create our integer vector type. Note that it may be larger
4270 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
4271 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
4272 MVT IntegerViaVecVT =
4273 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
4274 IntegerViaVecElts);
4275
4276 uint64_t Bits = 0;
4277 unsigned BitPos = 0, IntegerEltIdx = 0;
4278 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
4279
4280 for (unsigned I = 0; I < NumElts;) {
4281 SDValue V = Op.getOperand(I);
4282 bool BitValue = !V.isUndef() && V->getAsZExtVal();
4283 Bits |= ((uint64_t)BitValue << BitPos);
4284 ++BitPos;
4285 ++I;
4286
4287 // Once we accumulate enough bits to fill our scalar type or process the
4288 // last element, insert into our vector and clear our accumulated data.
4289 if (I % NumViaIntegerBits == 0 || I == NumElts) {
4290 if (NumViaIntegerBits <= 32)
4291 Bits = SignExtend64<32>(Bits);
4292 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
4293 Elts[IntegerEltIdx] = Elt;
4294 Bits = 0;
4295 BitPos = 0;
4296 IntegerEltIdx++;
4297 }
4298 }
4299
4300 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
4301
4302 if (NumElts < NumViaIntegerBits) {
4303 // If we're producing a smaller vector than our minimum legal integer
4304 // type, bitcast to the equivalent (known-legal) mask type, and extract
4305 // our final mask.
4306 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
4307 Vec = DAG.getBitcast(MVT::v8i1, Vec);
4308 Vec = DAG.getExtractSubvector(DL, VT, Vec, 0);
4309 } else {
4310 // Else we must have produced an integer type with the same size as the
4311 // mask type; bitcast for the final result.
4312 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
4313 Vec = DAG.getBitcast(VT, Vec);
4314 }
4315
4316 return Vec;
4317 }
4318
4320 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4321 : RISCVISD::VMV_V_X_VL;
4322 if (!VT.isFloatingPoint())
4323 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4324 Splat =
4325 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4326 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4327 }
4328
4329 // Try and match index sequences, which we can lower to the vid instruction
4330 // with optional modifications. An all-undef vector is matched by
4331 // getSplatValue, above.
4332 if (SDValue Res = lowerBuildVectorViaVID(Op, DAG, Subtarget))
4333 return Res;
4334
4335 // For very small build_vectors, use a single scalar insert of a constant.
4336 // TODO: Base this on constant rematerialization cost, not size.
4337 const unsigned EltBitSize = VT.getScalarSizeInBits();
4338 if (VT.getSizeInBits() <= 32 &&
4340 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
4341 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
4342 "Unexpected sequence type");
4343 // If we can use the original VL with the modified element type, this
4344 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4345 // be moved into InsertVSETVLI?
4346 unsigned ViaVecLen =
4347 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
4348 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4349
4350 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4351 uint64_t SplatValue = 0;
4352 // Construct the amalgamated value at this larger vector type.
4353 for (const auto &OpIdx : enumerate(Op->op_values())) {
4354 const auto &SeqV = OpIdx.value();
4355 if (!SeqV.isUndef())
4356 SplatValue |=
4357 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
4358 }
4359
4360 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4361 // achieve better constant materializion.
4362 // On RV32, we need to sign-extend to use getSignedConstant.
4363 if (ViaIntVT == MVT::i32)
4364 SplatValue = SignExtend64<32>(SplatValue);
4365
4366 SDValue Vec = DAG.getInsertVectorElt(
4367 DL, DAG.getUNDEF(ViaVecVT),
4368 DAG.getSignedConstant(SplatValue, DL, XLenVT), 0);
4369 if (ViaVecLen != 1)
4370 Vec = DAG.getExtractSubvector(DL, MVT::getVectorVT(ViaIntVT, 1), Vec, 0);
4371 return DAG.getBitcast(VT, Vec);
4372 }
4373
4374
4375 // Attempt to detect "hidden" splats, which only reveal themselves as splats
4376 // when re-interpreted as a vector with a larger element type. For example,
4377 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
4378 // could be instead splat as
4379 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
4380 // TODO: This optimization could also work on non-constant splats, but it
4381 // would require bit-manipulation instructions to construct the splat value.
4382 SmallVector<SDValue> Sequence;
4383 const auto *BV = cast<BuildVectorSDNode>(Op);
4384 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
4386 BV->getRepeatedSequence(Sequence) &&
4387 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
4388 unsigned SeqLen = Sequence.size();
4389 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
4390 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
4391 ViaIntVT == MVT::i64) &&
4392 "Unexpected sequence type");
4393
4394 // If we can use the original VL with the modified element type, this
4395 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4396 // be moved into InsertVSETVLI?
4397 const unsigned RequiredVL = NumElts / SeqLen;
4398 const unsigned ViaVecLen =
4399 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
4400 NumElts : RequiredVL;
4401 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4402
4403 unsigned EltIdx = 0;
4404 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4405 uint64_t SplatValue = 0;
4406 // Construct the amalgamated value which can be splatted as this larger
4407 // vector type.
4408 for (const auto &SeqV : Sequence) {
4409 if (!SeqV.isUndef())
4410 SplatValue |=
4411 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
4412 EltIdx++;
4413 }
4414
4415 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4416 // achieve better constant materializion.
4417 // On RV32, we need to sign-extend to use getSignedConstant.
4418 if (ViaIntVT == MVT::i32)
4419 SplatValue = SignExtend64<32>(SplatValue);
4420
4421 // Since we can't introduce illegal i64 types at this stage, we can only
4422 // perform an i64 splat on RV32 if it is its own sign-extended value. That
4423 // way we can use RVV instructions to splat.
4424 assert((ViaIntVT.bitsLE(XLenVT) ||
4425 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
4426 "Unexpected bitcast sequence");
4427 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
4428 SDValue ViaVL =
4429 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
4430 MVT ViaContainerVT =
4431 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
4432 SDValue Splat =
4433 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
4434 DAG.getUNDEF(ViaContainerVT),
4435 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
4436 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
4437 if (ViaVecLen != RequiredVL)
4439 DL, MVT::getVectorVT(ViaIntVT, RequiredVL), Splat, 0);
4440 return DAG.getBitcast(VT, Splat);
4441 }
4442 }
4443
4444 // If the number of signbits allows, see if we can lower as a <N x i8>.
4445 // Our main goal here is to reduce LMUL (and thus work) required to
4446 // build the constant, but we will also narrow if the resulting
4447 // narrow vector is known to materialize cheaply.
4448 // TODO: We really should be costing the smaller vector. There are
4449 // profitable cases this misses.
4450 if (EltBitSize > 8 && VT.isInteger() &&
4451 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
4452 DAG.ComputeMaxSignificantBits(Op) <= 8) {
4453 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
4454 DL, Op->ops());
4455 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
4456 Source, DAG, Subtarget);
4457 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
4458 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4459 }
4460
4461 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4462 return Res;
4463
4464 // For constant vectors, use generic constant pool lowering. Otherwise,
4465 // we'd have to materialize constants in GPRs just to move them into the
4466 // vector.
4467 return SDValue();
4468}
4469
4470static unsigned getPACKOpcode(unsigned DestBW,
4471 const RISCVSubtarget &Subtarget) {
4472 switch (DestBW) {
4473 default:
4474 llvm_unreachable("Unsupported pack size");
4475 case 16:
4476 return RISCV::PACKH;
4477 case 32:
4478 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
4479 case 64:
4480 assert(Subtarget.is64Bit());
4481 return RISCV::PACK;
4482 }
4483}
4484
4485/// Double the element size of the build vector to reduce the number
4486/// of vslide1down in the build vector chain. In the worst case, this
4487/// trades three scalar operations for 1 vector operation. Scalar
4488/// operations are generally lower latency, and for out-of-order cores
4489/// we also benefit from additional parallelism.
4491 const RISCVSubtarget &Subtarget) {
4492 SDLoc DL(Op);
4493 MVT VT = Op.getSimpleValueType();
4494 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4495 MVT ElemVT = VT.getVectorElementType();
4496 if (!ElemVT.isInteger())
4497 return SDValue();
4498
4499 // TODO: Relax these architectural restrictions, possibly with costing
4500 // of the actual instructions required.
4501 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
4502 return SDValue();
4503
4504 unsigned NumElts = VT.getVectorNumElements();
4505 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4506 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4507 NumElts % 2 != 0)
4508 return SDValue();
4509
4510 // Produce [B,A] packed into a type twice as wide. Note that all
4511 // scalars are XLenVT, possibly masked (see below).
4512 MVT XLenVT = Subtarget.getXLenVT();
4513 SDValue Mask = DAG.getConstant(
4514 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4515 auto pack = [&](SDValue A, SDValue B) {
4516 // Bias the scheduling of the inserted operations to near the
4517 // definition of the element - this tends to reduce register
4518 // pressure overall.
4519 SDLoc ElemDL(B);
4520 if (Subtarget.hasStdExtZbkb())
4521 // Note that we're relying on the high bits of the result being
4522 // don't care. For PACKW, the result is *sign* extended.
4523 return SDValue(
4524 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4525 ElemDL, XLenVT, A, B),
4526 0);
4527
4528 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4529 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4530 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4531 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4532 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4534 };
4535
4536 SmallVector<SDValue> NewOperands;
4537 NewOperands.reserve(NumElts / 2);
4538 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4539 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4540 assert(NumElts == NewOperands.size() * 2);
4541 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4542 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4543 return DAG.getNode(ISD::BITCAST, DL, VT,
4544 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4545}
4546
4548 const RISCVSubtarget &Subtarget) {
4549 MVT VT = Op.getSimpleValueType();
4550 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4551
4552 MVT EltVT = VT.getVectorElementType();
4553 MVT XLenVT = Subtarget.getXLenVT();
4554
4555 SDLoc DL(Op);
4556
4557 if (Subtarget.isRV32() && Subtarget.enablePExtSIMDCodeGen()) {
4558 if (VT != MVT::v4i8)
4559 return SDValue();
4560
4561 // <4 x i8> BUILD_VECTOR a, b, c, d -> PACK(PPACK.DH pair(a, b), pair(c, d))
4562 SDValue Val0 =
4563 DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i8, Op->getOperand(0));
4564 SDValue Val1 =
4565 DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i8, Op->getOperand(1));
4566 SDValue Val2 =
4567 DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i8, Op->getOperand(2));
4568 SDValue Val3 =
4569 DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i8, Op->getOperand(3));
4570 SDValue PackDH =
4571 DAG.getNode(RISCVISD::PPACK_DH, DL, {MVT::v2i16, MVT::v2i16},
4572 {Val0, Val1, Val2, Val3});
4573
4574 return DAG.getNode(
4575 ISD::BITCAST, DL, MVT::v4i8,
4576 SDValue(
4577 DAG.getMachineNode(
4578 RISCV::PACK, DL, MVT::i32,
4579 {DAG.getNode(ISD::BITCAST, DL, MVT::i32, PackDH.getValue(0)),
4580 DAG.getNode(ISD::BITCAST, DL, MVT::i32, PackDH.getValue(1))}),
4581 0));
4582 }
4583
4584 // Proper support for f16 requires Zvfh. bf16 always requires special
4585 // handling. We need to cast the scalar to integer and create an integer
4586 // build_vector.
4587 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4588 (EltVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) {
4589 MVT IVT = VT.changeVectorElementType(MVT::i16);
4590 SmallVector<SDValue, 16> NewOps(Op.getNumOperands());
4591 for (const auto &[I, U] : enumerate(Op->ops())) {
4592 SDValue Elem = U.get();
4593 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4594 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4595 // Called by LegalizeDAG, we need to use XLenVT operations since we
4596 // can't create illegal types.
4597 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4598 // Manually constant fold so the integer build_vector can be lowered
4599 // better. Waiting for DAGCombine will be too late.
4600 APInt V =
4601 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4602 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4603 } else {
4604 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4605 }
4606 } else {
4607 // Called by scalar type legalizer, we can use i16.
4608 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4609 }
4610 }
4611 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4612 return DAG.getBitcast(VT, Res);
4613 }
4614
4615 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4617 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4618
4619 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4620
4621 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4622
4623 if (VT.getVectorElementType() == MVT::i1) {
4624 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4625 // vector type, we have a legal equivalently-sized i8 type, so we can use
4626 // that.
4627 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4628 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4629
4630 SDValue WideVec;
4632 // For a splat, perform a scalar truncate before creating the wider
4633 // vector.
4634 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4635 DAG.getConstant(1, DL, Splat.getValueType()));
4636 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4637 } else {
4638 SmallVector<SDValue, 8> Ops(Op->op_values());
4639 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4640 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4641 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4642 }
4643
4644 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4645 }
4646
4648 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4649 return Gather;
4650
4651 // Prefer vmv.s.x/vfmv.s.f if legal to reduce work and register
4652 // pressure at high LMUL.
4653 if (all_of(Op->ops().drop_front(),
4654 [](const SDUse &U) { return U.get().isUndef(); })) {
4655 unsigned Opc =
4656 VT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
4657 if (!VT.isFloatingPoint())
4658 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4659 Splat = DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4660 Splat, VL);
4661 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4662 }
4663
4664 unsigned Opc =
4665 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
4666 if (!VT.isFloatingPoint())
4667 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4668 Splat =
4669 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4670 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4671 }
4672
4673 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4674 return Res;
4675
4676 // If we're compiling for an exact VLEN value, we can split our work per
4677 // register in the register group.
4678 if (const auto VLen = Subtarget.getRealVLen();
4679 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4680 MVT ElemVT = VT.getVectorElementType();
4681 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4682 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4683 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4684 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4685 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
4686
4687 // The following semantically builds up a fixed length concat_vector
4688 // of the component build_vectors. We eagerly lower to scalable and
4689 // insert_subvector here to avoid DAG combining it back to a large
4690 // build_vector.
4691 SmallVector<SDValue> BuildVectorOps(Op->ops());
4692 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4693 SDValue Vec = DAG.getUNDEF(ContainerVT);
4694 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4695 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4696 SDValue SubBV =
4697 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4698 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4699 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4700 Vec = DAG.getInsertSubvector(DL, Vec, SubBV, InsertIdx);
4701 }
4702 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4703 }
4704
4705 // If we're about to resort to vslide1down (or stack usage), pack our
4706 // elements into the widest scalar type we can. This will force a VL/VTYPE
4707 // toggle, but reduces the critical path, the number of vslide1down ops
4708 // required, and possibly enables scalar folds of the values.
4709 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4710 return Res;
4711
4712 // For m1 vectors, if we have non-undef values in both halves of our vector,
4713 // split the vector into low and high halves, build them separately, then
4714 // use a vselect to combine them. For long vectors, this cuts the critical
4715 // path of the vslide1down sequence in half, and gives us an opportunity
4716 // to special case each half independently. Note that we don't change the
4717 // length of the sub-vectors here, so if both fallback to the generic
4718 // vslide1down path, we should be able to fold the vselect into the final
4719 // vslidedown (for the undef tail) for the first half w/ masking.
4720 unsigned NumElts = VT.getVectorNumElements();
4721 unsigned NumUndefElts =
4722 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4723 unsigned NumDefElts = NumElts - NumUndefElts;
4724 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4725 ContainerVT.bitsLE(RISCVTargetLowering::getM1VT(ContainerVT))) {
4726 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4727 SmallVector<SDValue> MaskVals;
4728 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4729 SubVecAOps.reserve(NumElts);
4730 SubVecBOps.reserve(NumElts);
4731 for (const auto &[Idx, U] : enumerate(Op->ops())) {
4732 SDValue Elem = U.get();
4733 if (Idx < NumElts / 2) {
4734 SubVecAOps.push_back(Elem);
4735 SubVecBOps.push_back(UndefElem);
4736 } else {
4737 SubVecAOps.push_back(UndefElem);
4738 SubVecBOps.push_back(Elem);
4739 }
4740 bool SelectMaskVal = (Idx < NumElts / 2);
4741 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4742 }
4743 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4744 MaskVals.size() == NumElts);
4745
4746 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4747 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4748 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4749 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4750 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4751 }
4752
4753 // Cap the cost at a value linear to the number of elements in the vector.
4754 // The default lowering is to use the stack. The vector store + scalar loads
4755 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4756 // being (at least) linear in LMUL. As a result, using the vslidedown
4757 // lowering for every element ends up being VL*LMUL..
4758 // TODO: Should we be directly costing the stack alternative? Doing so might
4759 // give us a more accurate upper bound.
4760 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4761
4762 // TODO: unify with TTI getSlideCost.
4763 InstructionCost PerSlideCost = 1;
4764 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4765 default: break;
4766 case RISCVVType::LMUL_2:
4767 PerSlideCost = 2;
4768 break;
4769 case RISCVVType::LMUL_4:
4770 PerSlideCost = 4;
4771 break;
4772 case RISCVVType::LMUL_8:
4773 PerSlideCost = 8;
4774 break;
4775 }
4776
4777 // TODO: Should we be using the build instseq then cost + evaluate scheme
4778 // we use for integer constants here?
4779 unsigned UndefCount = 0;
4780 for (const SDValue &V : Op->ops()) {
4781 if (V.isUndef()) {
4782 UndefCount++;
4783 continue;
4784 }
4785 if (UndefCount) {
4786 LinearBudget -= PerSlideCost;
4787 UndefCount = 0;
4788 }
4789 LinearBudget -= PerSlideCost;
4790 }
4791 if (UndefCount) {
4792 LinearBudget -= PerSlideCost;
4793 }
4794
4795 if (LinearBudget < 0)
4796 return SDValue();
4797
4798 assert((!VT.isFloatingPoint() ||
4799 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4800 "Illegal type which will result in reserved encoding");
4801
4802 const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
4803
4804 // General case: splat the first operand and slide other operands down one
4805 // by one to form a vector. Alternatively, if every operand is an
4806 // extraction from element 0 of a vector, we use that vector from the last
4807 // extraction as the start value and slide up instead of slide down. Such that
4808 // (1) we can avoid the initial splat (2) we can turn those vslide1up into
4809 // vslideup of 1 later and eliminate the vector to scalar movement, which is
4810 // something we cannot do with vslide1down/vslidedown.
4811 // Of course, using vslide1up/vslideup might increase the register pressure,
4812 // and that's why we conservatively limit to cases where every operand is an
4813 // extraction from the first element.
4814 SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
4815 SDValue EVec;
4816 bool SlideUp = false;
4817 auto getVSlide = [&](EVT ContainerVT, SDValue Passthru, SDValue Vec,
4818 SDValue Offset, SDValue Mask, SDValue VL) -> SDValue {
4819 if (SlideUp)
4820 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4821 Mask, VL, Policy);
4822 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4823 Mask, VL, Policy);
4824 };
4825
4826 // The reason we don't use all_of here is because we're also capturing EVec
4827 // from the last non-undef operand. If the std::execution_policy of the
4828 // underlying std::all_of is anything but std::sequenced_policy we might
4829 // capture the wrong EVec.
4830 for (SDValue V : Operands) {
4831 using namespace SDPatternMatch;
4832 SlideUp = V.isUndef() || sd_match(V, m_ExtractElt(m_Value(EVec), m_Zero()));
4833 if (!SlideUp)
4834 break;
4835 }
4836
4837 // Do not slideup if the element type of EVec is different.
4838 if (SlideUp) {
4839 MVT EVecEltVT = EVec.getSimpleValueType().getVectorElementType();
4840 MVT ContainerEltVT = ContainerVT.getVectorElementType();
4841 if (EVecEltVT != ContainerEltVT)
4842 SlideUp = false;
4843 }
4844
4845 if (SlideUp) {
4846 MVT EVecContainerVT = EVec.getSimpleValueType();
4847 // Make sure the original vector has scalable vector type.
4848 if (EVecContainerVT.isFixedLengthVector()) {
4849 EVecContainerVT =
4850 getContainerForFixedLengthVector(DAG, EVecContainerVT, Subtarget);
4851 EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget);
4852 }
4853
4854 // Adapt EVec's type into ContainerVT.
4855 if (EVecContainerVT.getVectorMinNumElements() <
4856 ContainerVT.getVectorMinNumElements())
4857 EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0);
4858 else
4859 EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0);
4860
4861 // Reverse the elements as we're going to slide up from the last element.
4862 std::reverse(Operands.begin(), Operands.end());
4863 }
4864
4865 SDValue Vec;
4866 UndefCount = 0;
4867 for (SDValue V : Operands) {
4868 if (V.isUndef()) {
4869 UndefCount++;
4870 continue;
4871 }
4872
4873 // Start our sequence with either a TA splat or extract source in the
4874 // hopes that hardware is able to recognize there's no dependency on the
4875 // prior value of our temporary register.
4876 if (!Vec) {
4877 if (SlideUp) {
4878 Vec = EVec;
4879 } else {
4880 Vec = DAG.getSplatVector(VT, DL, V);
4881 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4882 }
4883
4884 UndefCount = 0;
4885 continue;
4886 }
4887
4888 if (UndefCount) {
4889 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4890 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4891 VL);
4892 UndefCount = 0;
4893 }
4894
4895 unsigned Opcode;
4896 if (VT.isFloatingPoint())
4897 Opcode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL;
4898 else
4899 Opcode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL;
4900
4901 if (!VT.isFloatingPoint())
4902 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4903 Vec = DAG.getNode(Opcode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4904 V, Mask, VL);
4905 }
4906 if (UndefCount) {
4907 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4908 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4909 VL);
4910 }
4911 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4912}
4913
4914static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4916 SelectionDAG &DAG) {
4917 if (!Passthru)
4918 Passthru = DAG.getUNDEF(VT);
4920 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4921 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4922 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4923 // node in order to try and match RVV vector/scalar instructions.
4924 if ((LoC >> 31) == HiC)
4925 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4926
4927 // Use vmv.v.x with EEW=32. Use either a vsetivli or vsetvli to change
4928 // VL. This can temporarily increase VL if VL less than VLMAX.
4929 if (LoC == HiC) {
4930 SDValue NewVL;
4931 if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4932 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4933 else
4934 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4935 MVT InterVT =
4936 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4937 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4938 DAG.getUNDEF(InterVT), Lo, NewVL);
4939 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4940 }
4941 }
4942
4943 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4944 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4945 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4946 Hi.getConstantOperandVal(1) == 31)
4947 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4948
4949 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4950 // even if it might be sign extended.
4951 if (Hi.isUndef())
4952 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4953
4954 // Fall back to a stack store and stride x0 vector load.
4955 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4956 Hi, VL);
4957}
4958
4959// Called by type legalization to handle splat of i64 on RV32.
4960// FIXME: We can optimize this when the type has sign or zero bits in one
4961// of the halves.
4962static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4963 SDValue Scalar, SDValue VL,
4964 SelectionDAG &DAG) {
4965 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4966 SDValue Lo, Hi;
4967 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4968 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4969}
4970
4971// This function lowers a splat of a scalar operand Splat with the vector
4972// length VL. It ensures the final sequence is type legal, which is useful when
4973// lowering a splat after type legalization.
4974static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4975 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4976 const RISCVSubtarget &Subtarget) {
4977 bool HasPassthru = Passthru && !Passthru.isUndef();
4978 if (!HasPassthru && !Passthru)
4979 Passthru = DAG.getUNDEF(VT);
4980
4981 MVT EltVT = VT.getVectorElementType();
4982 MVT XLenVT = Subtarget.getXLenVT();
4983
4984 if (VT.isFloatingPoint()) {
4985 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4986 (EltVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) {
4987 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4988 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4989 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4990 else
4991 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4992 MVT IVT = VT.changeVectorElementType(MVT::i16);
4993 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4994 SDValue Splat =
4995 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4996 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4997 }
4998 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4999 }
5000
5001 // Simplest case is that the operand needs to be promoted to XLenVT.
5002 if (Scalar.getValueType().bitsLE(XLenVT)) {
5003 // If the operand is a constant, sign extend to increase our chances
5004 // of being able to use a .vi instruction. ANY_EXTEND would become a
5005 // a zero extend and the simm5 check in isel would fail.
5006 // FIXME: Should we ignore the upper bits in isel instead?
5007 unsigned ExtOpc =
5009 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
5010 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
5011 }
5012
5013 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
5014 "Unexpected scalar for splat lowering!");
5015
5016 if (isOneConstant(VL) && isNullConstant(Scalar))
5017 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
5018 DAG.getConstant(0, DL, XLenVT), VL);
5019
5020 // Otherwise use the more complicated splatting algorithm.
5021 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
5022}
5023
5024// This function lowers an insert of a scalar operand Scalar into lane
5025// 0 of the vector regardless of the value of VL. The contents of the
5026// remaining lanes of the result vector are unspecified. VL is assumed
5027// to be non-zero.
5029 const SDLoc &DL, SelectionDAG &DAG,
5030 const RISCVSubtarget &Subtarget) {
5031 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
5032
5033 const MVT XLenVT = Subtarget.getXLenVT();
5034 SDValue Passthru = DAG.getUNDEF(VT);
5035
5036 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5037 isNullConstant(Scalar.getOperand(1))) {
5038 SDValue ExtractedVal = Scalar.getOperand(0);
5039 // The element types must be the same.
5040 if (ExtractedVal.getValueType().getVectorElementType() ==
5041 VT.getVectorElementType()) {
5042 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
5043 MVT ExtractedContainerVT = ExtractedVT;
5044 if (ExtractedContainerVT.isFixedLengthVector()) {
5045 ExtractedContainerVT = getContainerForFixedLengthVector(
5046 DAG, ExtractedContainerVT, Subtarget);
5047 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
5048 ExtractedVal, DAG, Subtarget);
5049 }
5050 if (ExtractedContainerVT.bitsLE(VT))
5051 return DAG.getInsertSubvector(DL, Passthru, ExtractedVal, 0);
5052 return DAG.getExtractSubvector(DL, VT, ExtractedVal, 0);
5053 }
5054 }
5055
5056 if (VT.isFloatingPoint())
5057 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
5058 VL);
5059
5060 // Avoid the tricky legalization cases by falling back to using the
5061 // splat code which already handles it gracefully.
5062 if (!Scalar.getValueType().bitsLE(XLenVT))
5063 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
5064 DAG.getConstant(1, DL, XLenVT),
5065 VT, DL, DAG, Subtarget);
5066
5067 // If the operand is a constant, sign extend to increase our chances
5068 // of being able to use a .vi instruction. ANY_EXTEND would become a
5069 // a zero extend and the simm5 check in isel would fail.
5070 // FIXME: Should we ignore the upper bits in isel instead?
5071 unsigned ExtOpc =
5073 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
5074 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
5075 VL);
5076}
5077
5078/// If concat_vector(V1,V2) could be folded away to some existing
5079/// vector source, return it. Note that the source may be larger
5080/// than the requested concat_vector (i.e. a extract_subvector
5081/// might be required.)
5083 EVT VT = V1.getValueType();
5084 assert(VT == V2.getValueType() && "argument types must match");
5085 // Both input must be extracts.
5086 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
5088 return SDValue();
5089
5090 // Extracting from the same source.
5091 SDValue Src = V1.getOperand(0);
5092 if (Src != V2.getOperand(0) ||
5093 VT.isScalableVector() != Src.getValueType().isScalableVector())
5094 return SDValue();
5095
5096 // The extracts must extract the two halves of the source.
5097 if (V1.getConstantOperandVal(1) != 0 ||
5099 return SDValue();
5100
5101 return Src;
5102}
5103
5104// Can this shuffle be performed on exactly one (possibly larger) input?
5106
5107 if (V2.isUndef())
5108 return V1;
5109
5110 unsigned NumElts = VT.getVectorNumElements();
5111 // Src needs to have twice the number of elements.
5112 // TODO: Update shuffle lowering to add the extract subvector
5113 if (SDValue Src = foldConcatVector(V1, V2);
5114 Src && Src.getValueType().getVectorNumElements() == (NumElts * 2))
5115 return Src;
5116
5117 return SDValue();
5118}
5119
5120/// Is this shuffle interleaving contiguous elements from one vector into the
5121/// even elements and contiguous elements from another vector into the odd
5122/// elements. \p EvenSrc will contain the element that should be in the first
5123/// even element. \p OddSrc will contain the element that should be in the first
5124/// odd element. These can be the first element in a source or the element half
5125/// way through the source.
5126static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
5127 int &OddSrc, const RISCVSubtarget &Subtarget) {
5128 // We need to be able to widen elements to the next larger integer type or
5129 // use the zip2a instruction at e64.
5130 if (VT.getScalarSizeInBits() >= Subtarget.getELen() &&
5131 !Subtarget.hasVendorXRivosVizip())
5132 return false;
5133
5134 int Size = Mask.size();
5135 int NumElts = VT.getVectorNumElements();
5136 assert(Size == (int)NumElts && "Unexpected mask size");
5137
5138 SmallVector<unsigned, 2> StartIndexes;
5139 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
5140 return false;
5141
5142 EvenSrc = StartIndexes[0];
5143 OddSrc = StartIndexes[1];
5144
5145 // One source should be low half of first vector.
5146 if (EvenSrc != 0 && OddSrc != 0)
5147 return false;
5148
5149 // Subvectors will be subtracted from either at the start of the two input
5150 // vectors, or at the start and middle of the first vector if it's an unary
5151 // interleave.
5152 // In both cases, HalfNumElts will be extracted.
5153 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
5154 // we'll create an illegal extract_subvector.
5155 // FIXME: We could support other values using a slidedown first.
5156 int HalfNumElts = NumElts / 2;
5157 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
5158}
5159
5160/// Is this mask representing a masked combination of two slides?
5162 std::array<std::pair<int, int>, 2> &SrcInfo) {
5163 if (!llvm::isMaskedSlidePair(Mask, Mask.size(), SrcInfo))
5164 return false;
5165
5166 // Avoid matching vselect idioms
5167 if (SrcInfo[0].second == 0 && SrcInfo[1].second == 0)
5168 return false;
5169 // Prefer vslideup as the second instruction, and identity
5170 // only as the initial instruction.
5171 if ((SrcInfo[0].second > 0 && SrcInfo[1].second < 0) ||
5172 SrcInfo[1].second == 0)
5173 std::swap(SrcInfo[0], SrcInfo[1]);
5174 assert(SrcInfo[0].first != -1 && "Must find one slide");
5175 return true;
5176}
5177
5178// Exactly matches the semantics of a previously existing custom matcher
5179// to allow migration to new matcher without changing output.
5180static bool isElementRotate(const std::array<std::pair<int, int>, 2> &SrcInfo,
5181 unsigned NumElts) {
5182 if (SrcInfo[1].first == -1)
5183 return true;
5184 return SrcInfo[0].second < 0 && SrcInfo[1].second > 0 &&
5185 SrcInfo[1].second - SrcInfo[0].second == (int)NumElts;
5186}
5187
5188static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,
5189 ArrayRef<int> Mask, unsigned Factor,
5190 bool RequiredPolarity) {
5191 int NumElts = Mask.size();
5192 for (const auto &[Idx, M] : enumerate(Mask)) {
5193 if (M < 0)
5194 continue;
5195 int Src = M >= NumElts;
5196 int Diff = (int)Idx - (M % NumElts);
5197 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
5198 assert(C != (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
5199 "Must match exactly one of the two slides");
5200 if (RequiredPolarity != (C == (Idx / Factor) % 2))
5201 return false;
5202 }
5203 return true;
5204}
5205
5206/// Given a shuffle which can be represented as a pair of two slides,
5207/// see if it is a zipeven idiom. Zipeven is:
5208/// vs2: a0 a1 a2 a3
5209/// vs1: b0 b1 b2 b3
5210/// vd: a0 b0 a2 b2
5211static bool isZipEven(const std::array<std::pair<int, int>, 2> &SrcInfo,
5212 ArrayRef<int> Mask, unsigned &Factor) {
5213 Factor = SrcInfo[1].second;
5214 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
5215 Mask.size() % Factor == 0 &&
5216 isAlternating(SrcInfo, Mask, Factor, true);
5217}
5218
5219/// Given a shuffle which can be represented as a pair of two slides,
5220/// see if it is a zipodd idiom. Zipodd is:
5221/// vs2: a0 a1 a2 a3
5222/// vs1: b0 b1 b2 b3
5223/// vd: a1 b1 a3 b3
5224/// Note that the operand order is swapped due to the way we canonicalize
5225/// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2.
5226static bool isZipOdd(const std::array<std::pair<int, int>, 2> &SrcInfo,
5227 ArrayRef<int> Mask, unsigned &Factor) {
5228 Factor = -SrcInfo[1].second;
5229 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
5230 Mask.size() % Factor == 0 &&
5231 isAlternating(SrcInfo, Mask, Factor, false);
5232}
5233
5234// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
5235// 2, 4, 8 and the integer type Factor-times larger than VT's
5236// element type must be a legal element type.
5237// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
5238// -> [p, q, r, s] (Factor=2, Index=1)
5240 SDValue Src, unsigned Factor,
5241 unsigned Index, SelectionDAG &DAG) {
5242 unsigned EltBits = VT.getScalarSizeInBits();
5243 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
5244 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
5245 SrcEC.divideCoefficientBy(Factor));
5246 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
5247 SrcEC.divideCoefficientBy(Factor));
5248 Src = DAG.getBitcast(WideSrcVT, Src);
5249
5250 unsigned Shift = Index * EltBits;
5251 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
5252 DAG.getConstant(Shift, DL, WideSrcVT));
5253 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
5255 Res = DAG.getBitcast(CastVT, Res);
5256 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
5257}
5258
5259/// Match a single source shuffle which is an identity except that some
5260/// particular element is repeated. This can be lowered as a masked
5261/// vrgather.vi/vx. Note that the two source form of this is handled
5262/// by the recursive splitting logic and doesn't need special handling.
5264 const RISCVSubtarget &Subtarget,
5265 SelectionDAG &DAG) {
5266
5267 SDLoc DL(SVN);
5268 MVT VT = SVN->getSimpleValueType(0);
5269 SDValue V1 = SVN->getOperand(0);
5270 assert(SVN->getOperand(1).isUndef());
5271 ArrayRef<int> Mask = SVN->getMask();
5272 const unsigned NumElts = VT.getVectorNumElements();
5273 MVT XLenVT = Subtarget.getXLenVT();
5274
5275 std::optional<int> SplatIdx;
5276 for (auto [I, M] : enumerate(Mask)) {
5277 if (M == -1 || I == (unsigned)M)
5278 continue;
5279 if (SplatIdx && *SplatIdx != M)
5280 return SDValue();
5281 SplatIdx = M;
5282 }
5283
5284 if (!SplatIdx)
5285 return SDValue();
5286
5287 SmallVector<SDValue> MaskVals;
5288 for (int MaskIndex : Mask) {
5289 bool SelectMaskVal = MaskIndex == *SplatIdx;
5290 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5291 }
5292 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5293 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5294 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5295 SDValue Splat = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT),
5296 SmallVector<int>(NumElts, *SplatIdx));
5297 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, Splat, V1);
5298}
5299
5300// Lower the following shuffle to vslidedown.
5301// a)
5302// t49: v8i8 = extract_subvector t13, Constant:i64<0>
5303// t109: v8i8 = extract_subvector t13, Constant:i64<8>
5304// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
5305// b)
5306// t69: v16i16 = extract_subvector t68, Constant:i64<0>
5307// t23: v8i16 = extract_subvector t69, Constant:i64<0>
5308// t29: v4i16 = extract_subvector t23, Constant:i64<4>
5309// t26: v8i16 = extract_subvector t69, Constant:i64<8>
5310// t30: v4i16 = extract_subvector t26, Constant:i64<0>
5311// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
5313 SDValue V1, SDValue V2,
5314 ArrayRef<int> Mask,
5315 const RISCVSubtarget &Subtarget,
5316 SelectionDAG &DAG) {
5317 auto findNonEXTRACT_SUBVECTORParent =
5318 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
5319 uint64_t Offset = 0;
5320 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5321 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
5322 // a scalable vector. But we don't want to match the case.
5323 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
5324 Offset += Parent.getConstantOperandVal(1);
5325 Parent = Parent.getOperand(0);
5326 }
5327 return std::make_pair(Parent, Offset);
5328 };
5329
5330 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
5331 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
5332
5333 // Extracting from the same source.
5334 SDValue Src = V1Src;
5335 if (Src != V2Src)
5336 return SDValue();
5337
5338 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
5339 SmallVector<int, 16> NewMask(Mask);
5340 for (size_t i = 0; i != NewMask.size(); ++i) {
5341 if (NewMask[i] == -1)
5342 continue;
5343
5344 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
5345 NewMask[i] = NewMask[i] + V1IndexOffset;
5346 } else {
5347 // Minus NewMask.size() is needed. Otherwise, the b case would be
5348 // <5,6,7,12> instead of <5,6,7,8>.
5349 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
5350 }
5351 }
5352
5353 // First index must be known and non-zero. It will be used as the slidedown
5354 // amount.
5355 if (NewMask[0] <= 0)
5356 return SDValue();
5357
5358 // NewMask is also continuous.
5359 for (unsigned i = 1; i != NewMask.size(); ++i)
5360 if (NewMask[i - 1] + 1 != NewMask[i])
5361 return SDValue();
5362
5363 MVT XLenVT = Subtarget.getXLenVT();
5364 MVT SrcVT = Src.getSimpleValueType();
5365 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
5366 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
5367 SDValue Slidedown =
5368 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
5369 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
5370 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
5371 return DAG.getExtractSubvector(
5372 DL, VT, convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget), 0);
5373}
5374
5375// Because vslideup leaves the destination elements at the start intact, we can
5376// use it to perform shuffles that insert subvectors:
5377//
5378// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
5379// ->
5380// vsetvli zero, 8, e8, mf2, ta, ma
5381// vslideup.vi v8, v9, 4
5382//
5383// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
5384// ->
5385// vsetvli zero, 5, e8, mf2, tu, ma
5386// vslideup.v1 v8, v9, 2
5388 SDValue V1, SDValue V2,
5389 ArrayRef<int> Mask,
5390 const RISCVSubtarget &Subtarget,
5391 SelectionDAG &DAG) {
5392 unsigned NumElts = VT.getVectorNumElements();
5393 int NumSubElts, Index;
5394 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
5395 Index))
5396 return SDValue();
5397
5398 bool OpsSwapped = Mask[Index] < (int)NumElts;
5399 SDValue InPlace = OpsSwapped ? V2 : V1;
5400 SDValue ToInsert = OpsSwapped ? V1 : V2;
5401
5402 MVT XLenVT = Subtarget.getXLenVT();
5403 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5404 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
5405 // We slide up by the index that the subvector is being inserted at, and set
5406 // VL to the index + the number of elements being inserted.
5407 unsigned Policy =
5409 // If the we're adding a suffix to the in place vector, i.e. inserting right
5410 // up to the very end of it, then we don't actually care about the tail.
5411 if (NumSubElts + Index >= (int)NumElts)
5412 Policy |= RISCVVType::TAIL_AGNOSTIC;
5413
5414 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
5415 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
5416 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
5417
5418 SDValue Res;
5419 // If we're inserting into the lowest elements, use a tail undisturbed
5420 // vmv.v.v.
5421 if (Index == 0)
5422 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
5423 VL);
5424 else
5425 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
5426 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
5427 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5428}
5429
5430/// Match v(f)slide1up/down idioms. These operations involve sliding
5431/// N-1 elements to make room for an inserted scalar at one end.
5433 SDValue V1, SDValue V2,
5434 ArrayRef<int> Mask,
5435 const RISCVSubtarget &Subtarget,
5436 SelectionDAG &DAG) {
5437 bool OpsSwapped = false;
5438 if (!isa<BuildVectorSDNode>(V1)) {
5439 if (!isa<BuildVectorSDNode>(V2))
5440 return SDValue();
5441 std::swap(V1, V2);
5442 OpsSwapped = true;
5443 }
5444 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
5445 if (!Splat)
5446 return SDValue();
5447
5448 // Return true if the mask could describe a slide of Mask.size() - 1
5449 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
5450 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
5451 const unsigned S = (Offset > 0) ? 0 : -Offset;
5452 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
5453 for (unsigned i = S; i != E; ++i)
5454 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
5455 return false;
5456 return true;
5457 };
5458
5459 const unsigned NumElts = VT.getVectorNumElements();
5460 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
5461 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
5462 return SDValue();
5463
5464 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
5465 // Inserted lane must come from splat, undef scalar is legal but not profitable.
5466 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
5467 return SDValue();
5468
5469 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5470 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5471
5472 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
5473 // vslide1{down,up}.vx instead.
5474 if (VT.getVectorElementType() == MVT::bf16 ||
5475 (VT.getVectorElementType() == MVT::f16 &&
5476 !Subtarget.hasVInstructionsF16())) {
5477 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
5478 Splat =
5479 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
5480 V2 = DAG.getBitcast(
5481 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
5482 SDValue Vec = DAG.getNode(
5483 IsVSlidedown ? RISCVISD::VSLIDE1DOWN_VL : RISCVISD::VSLIDE1UP_VL, DL,
5484 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
5485 Vec = DAG.getBitcast(ContainerVT, Vec);
5486 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5487 }
5488
5489 auto OpCode = IsVSlidedown ?
5490 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
5491 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
5492 if (!VT.isFloatingPoint())
5493 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
5494 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
5495 DAG.getUNDEF(ContainerVT),
5496 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
5497 Splat, TrueMask, VL);
5498 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5499}
5500
5501/// Match a mask which "spreads" the leading elements of a vector evenly
5502/// across the result. Factor is the spread amount, and Index is the
5503/// offset applied. (on success, Index < Factor) This is the inverse
5504/// of a deinterleave with the same Factor and Index. This is analogous
5505/// to an interleave, except that all but one lane is undef.
5507 unsigned &Index) {
5508 SmallVector<bool> LaneIsUndef(Factor, true);
5509 for (unsigned i = 0; i < Mask.size(); i++)
5510 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
5511
5512 bool Found = false;
5513 for (unsigned i = 0; i < Factor; i++) {
5514 if (LaneIsUndef[i])
5515 continue;
5516 if (Found)
5517 return false;
5518 Index = i;
5519 Found = true;
5520 }
5521 if (!Found)
5522 return false;
5523
5524 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
5525 unsigned j = i * Factor + Index;
5526 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
5527 return false;
5528 }
5529 return true;
5530}
5531
5532static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1,
5533 const SDLoc &DL, SelectionDAG &DAG,
5534 const RISCVSubtarget &Subtarget) {
5535 assert(RISCVISD::RI_VZIPEVEN_VL == Opc || RISCVISD::RI_VZIPODD_VL == Opc ||
5536 RISCVISD::RI_VZIP2A_VL == Opc || RISCVISD::RI_VZIP2B_VL == Opc ||
5537 RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc);
5539
5540 MVT VT = Op0.getSimpleValueType();
5542 Op0 = DAG.getBitcast(IntVT, Op0);
5543 Op1 = DAG.getBitcast(IntVT, Op1);
5544
5545 MVT ContainerVT = IntVT;
5546 if (VT.isFixedLengthVector()) {
5547 ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget);
5548 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
5549 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
5550 }
5551
5552 MVT InnerVT = ContainerVT;
5553 auto [Mask, VL] = getDefaultVLOps(IntVT, InnerVT, DL, DAG, Subtarget);
5554 if (Op1.isUndef() &&
5555 ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
5556 (RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc)) {
5557 InnerVT = ContainerVT.getHalfNumVectorElementsVT();
5558 VL = DAG.getConstant(VT.getVectorNumElements() / 2, DL,
5559 Subtarget.getXLenVT());
5560 Mask = getAllOnesMask(InnerVT, VL, DL, DAG);
5561 unsigned HighIdx = InnerVT.getVectorElementCount().getKnownMinValue();
5562 Op1 = DAG.getExtractSubvector(DL, InnerVT, Op0, HighIdx);
5563 Op0 = DAG.getExtractSubvector(DL, InnerVT, Op0, 0);
5564 }
5565
5566 SDValue Passthru = DAG.getUNDEF(InnerVT);
5567 SDValue Res = DAG.getNode(Opc, DL, InnerVT, Op0, Op1, Passthru, Mask, VL);
5568 if (InnerVT.bitsLT(ContainerVT))
5569 Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Res, 0);
5570 if (IntVT.isFixedLengthVector())
5571 Res = convertFromScalableVector(IntVT, Res, DAG, Subtarget);
5572 Res = DAG.getBitcast(VT, Res);
5573 return Res;
5574}
5575
5576// Given a vector a, b, c, d return a vector Factor times longer
5577// with Factor-1 undef's between elements. Ex:
5578// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
5579// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
5580static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
5581 const SDLoc &DL, SelectionDAG &DAG) {
5582
5583 MVT VT = V.getSimpleValueType();
5584 unsigned EltBits = VT.getScalarSizeInBits();
5586 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
5587
5588 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
5589
5590 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
5591 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
5592 // allow the SHL to fold away if Index is 0.
5593 if (Index != 0)
5594 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
5595 DAG.getConstant(EltBits * Index, DL, WideVT));
5596 // Make sure to use original element type
5598 EC.multiplyCoefficientBy(Factor));
5599 return DAG.getBitcast(ResultVT, Result);
5600}
5601
5602// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
5603// to create an interleaved vector of <[vscale x] n*2 x ty>.
5604// This requires that the size of ty is less than the subtarget's maximum ELEN.
5606 const SDLoc &DL, SelectionDAG &DAG,
5607 const RISCVSubtarget &Subtarget) {
5608
5609 // FIXME: Not only does this optimize the code, it fixes some correctness
5610 // issues because MIR does not have freeze.
5611 if (EvenV.isUndef())
5612 return getWideningSpread(OddV, 2, 1, DL, DAG);
5613 if (OddV.isUndef())
5614 return getWideningSpread(EvenV, 2, 0, DL, DAG);
5615
5616 MVT VecVT = EvenV.getSimpleValueType();
5617 MVT VecContainerVT = VecVT; // <vscale x n x ty>
5618 // Convert fixed vectors to scalable if needed
5619 if (VecContainerVT.isFixedLengthVector()) {
5620 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
5621 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
5622 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
5623 }
5624
5625 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
5626
5627 // We're working with a vector of the same size as the resulting
5628 // interleaved vector, but with half the number of elements and
5629 // twice the SEW (Hence the restriction on not using the maximum
5630 // ELEN)
5631 MVT WideVT =
5633 VecVT.getVectorElementCount());
5634 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
5635 if (WideContainerVT.isFixedLengthVector())
5636 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
5637
5638 // Bitcast the input vectors to integers in case they are FP
5639 VecContainerVT = VecContainerVT.changeTypeToInteger();
5640 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
5641 OddV = DAG.getBitcast(VecContainerVT, OddV);
5642
5643 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
5644 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
5645
5646 SDValue Interleaved;
5647 if (Subtarget.hasStdExtZvbb()) {
5648 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
5649 SDValue OffsetVec =
5650 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
5651 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
5652 OffsetVec, Passthru, Mask, VL);
5653 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
5654 Interleaved, EvenV, Passthru, Mask, VL);
5655 } else {
5656 // FIXME: We should freeze the odd vector here. We already handled the case
5657 // of provably undef/poison above.
5658
5659 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
5660 // vwaddu.vv
5661 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
5662 OddV, Passthru, Mask, VL);
5663
5664 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
5665 SDValue AllOnesVec = DAG.getSplatVector(
5666 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
5667 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
5668 OddV, AllOnesVec, Passthru, Mask, VL);
5669
5670 // Add the two together so we get
5671 // (OddV * 0xff...ff) + (OddV + EvenV)
5672 // = (OddV * 0x100...00) + EvenV
5673 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
5674 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
5675 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
5676 Interleaved, OddsMul, Passthru, Mask, VL);
5677 }
5678
5679 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
5680 MVT ResultContainerVT = MVT::getVectorVT(
5681 VecVT.getVectorElementType(), // Make sure to use original type
5682 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
5683 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
5684
5685 // Convert back to a fixed vector if needed
5686 MVT ResultVT =
5689 if (ResultVT.isFixedLengthVector())
5690 Interleaved =
5691 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
5692
5693 return Interleaved;
5694}
5695
5696// If we have a vector of bits that we want to reverse, we can use a vbrev on a
5697// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
5699 SelectionDAG &DAG,
5700 const RISCVSubtarget &Subtarget) {
5701 SDLoc DL(SVN);
5702 MVT VT = SVN->getSimpleValueType(0);
5703 SDValue V = SVN->getOperand(0);
5704 unsigned NumElts = VT.getVectorNumElements();
5705
5706 assert(VT.getVectorElementType() == MVT::i1);
5707
5709 SVN->getMask().size()) ||
5710 !SVN->getOperand(1).isUndef())
5711 return SDValue();
5712
5713 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5714 EVT ViaVT = EVT::getVectorVT(
5715 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5716 EVT ViaBitVT =
5717 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5718
5719 // If we don't have zvbb or the larger element type > ELEN, the operation will
5720 // be illegal.
5722 ViaVT) ||
5723 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5724 return SDValue();
5725
5726 // If the bit vector doesn't fit exactly into the larger element type, we need
5727 // to insert it into the larger vector and then shift up the reversed bits
5728 // afterwards to get rid of the gap introduced.
5729 if (ViaEltSize > NumElts)
5730 V = DAG.getInsertSubvector(DL, DAG.getUNDEF(ViaBitVT), V, 0);
5731
5732 SDValue Res =
5733 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5734
5735 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5736 // element type.
5737 if (ViaEltSize > NumElts)
5738 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5739 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5740
5741 Res = DAG.getBitcast(ViaBitVT, Res);
5742
5743 if (ViaEltSize > NumElts)
5744 Res = DAG.getExtractSubvector(DL, VT, Res, 0);
5745 return Res;
5746}
5747
5749 const RISCVSubtarget &Subtarget,
5750 MVT &RotateVT, unsigned &RotateAmt) {
5751 unsigned NumElts = VT.getVectorNumElements();
5752 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5753 unsigned NumSubElts;
5754 if (!ShuffleVectorInst::isBitRotateMask(Mask, EltSizeInBits, 2,
5755 NumElts, NumSubElts, RotateAmt))
5756 return false;
5757 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5758 NumElts / NumSubElts);
5759
5760 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5761 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5762}
5763
5764// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5765// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5766// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5768 SelectionDAG &DAG,
5769 const RISCVSubtarget &Subtarget) {
5770 SDLoc DL(SVN);
5771
5772 EVT VT = SVN->getValueType(0);
5773 unsigned RotateAmt;
5774 MVT RotateVT;
5775 if (!isLegalBitRotate(SVN->getMask(), VT, Subtarget, RotateVT, RotateAmt))
5776 return SDValue();
5777
5778 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5779
5780 SDValue Rotate;
5781 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5782 // so canonicalize to vrev8.
5783 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5784 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5785 else
5786 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5787 DAG.getConstant(RotateAmt, DL, RotateVT));
5788
5789 return DAG.getBitcast(VT, Rotate);
5790}
5791
5792// If compiling with an exactly known VLEN, see if we can split a
5793// shuffle on m2 or larger into a small number of m1 sized shuffles
5794// which write each destination registers exactly once.
5796 SelectionDAG &DAG,
5797 const RISCVSubtarget &Subtarget) {
5798 SDLoc DL(SVN);
5799 MVT VT = SVN->getSimpleValueType(0);
5800 SDValue V1 = SVN->getOperand(0);
5801 SDValue V2 = SVN->getOperand(1);
5802 ArrayRef<int> Mask = SVN->getMask();
5803
5804 // If we don't know exact data layout, not much we can do. If this
5805 // is already m1 or smaller, no point in splitting further.
5806 const auto VLen = Subtarget.getRealVLen();
5807 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5808 return SDValue();
5809
5810 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5811 // expansion for.
5812 unsigned RotateAmt;
5813 MVT RotateVT;
5814 if (isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))
5815 return SDValue();
5816
5817 MVT ElemVT = VT.getVectorElementType();
5818 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5819
5820 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5821 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5822 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5823 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
5824 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5825 unsigned NumElts = ContainerVT.getVectorMinNumElements();
5826 unsigned NumOfSrcRegs = NumElts / NumOpElts;
5827 unsigned NumOfDestRegs = NumElts / NumOpElts;
5828 // The following semantically builds up a fixed length concat_vector
5829 // of the component shuffle_vectors. We eagerly lower to scalable here
5830 // to avoid DAG combining it back to a large shuffle_vector again.
5831 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5832 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5834 Operands;
5836 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5837 [&]() { Operands.emplace_back(); },
5838 [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5839 Operands.emplace_back().emplace_back(SrcVecIdx, UINT_MAX,
5840 SmallVector<int>(SrcSubMask));
5841 },
5842 [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5843 if (NewReg)
5844 Operands.emplace_back();
5845 Operands.back().emplace_back(Idx1, Idx2, SmallVector<int>(SrcSubMask));
5846 });
5847 assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5848 // Note: check that we do not emit too many shuffles here to prevent code
5849 // size explosion.
5850 // TODO: investigate, if it can be improved by extra analysis of the masks to
5851 // check if the code is more profitable.
5852 unsigned NumShuffles = std::accumulate(
5853 Operands.begin(), Operands.end(), 0u,
5854 [&](unsigned N,
5855 ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5856 if (Data.empty())
5857 return N;
5858 N += Data.size();
5859 for (const auto &P : Data) {
5860 unsigned Idx2 = std::get<1>(P);
5861 ArrayRef<int> Mask = std::get<2>(P);
5862 if (Idx2 != UINT_MAX)
5863 ++N;
5864 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5865 --N;
5866 }
5867 return N;
5868 });
5869 if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5870 (NumOfDestRegs <= 2 && NumShuffles >= 4))
5871 return SDValue();
5872 auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
5873 SDValue SubVec = DAG.getExtractSubvector(DL, M1VT, SrcVec, ExtractIdx);
5874 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5875 return SubVec;
5876 };
5877 auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
5879 SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);
5880 return SubVec;
5881 };
5882 SDValue Vec = DAG.getUNDEF(ContainerVT);
5883 for (auto [I, Data] : enumerate(Operands)) {
5884 if (Data.empty())
5885 continue;
5887 for (unsigned I : seq<unsigned>(Data.size())) {
5888 const auto &[Idx1, Idx2, _] = Data[I];
5889 // If the shuffle contains permutation of odd number of elements,
5890 // Idx1 might be used already in the first iteration.
5891 //
5892 // Idx1 = shuffle Idx1, Idx2
5893 // Idx1 = shuffle Idx1, Idx3
5894 SDValue &V = Values.try_emplace(Idx1).first->getSecond();
5895 if (!V)
5896 V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5897 (Idx1 % NumOfSrcRegs) * NumOpElts);
5898 if (Idx2 != UINT_MAX) {
5899 SDValue &V = Values.try_emplace(Idx2).first->getSecond();
5900 if (!V)
5901 V = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5902 (Idx2 % NumOfSrcRegs) * NumOpElts);
5903 }
5904 }
5905 SDValue V;
5906 for (const auto &[Idx1, Idx2, Mask] : Data) {
5907 SDValue V1 = Values.at(Idx1);
5908 SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
5909 V = PerformShuffle(V1, V2, Mask);
5910 Values[Idx1] = V;
5911 }
5912
5913 unsigned InsertIdx = I * NumOpElts;
5914 V = convertToScalableVector(M1VT, V, DAG, Subtarget);
5915 Vec = DAG.getInsertSubvector(DL, Vec, V, InsertIdx);
5916 }
5917 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5918}
5919
5920// Matches a subset of compress masks with a contiguous prefix of output
5921// elements. This could be extended to allow gaps by deciding which
5922// source elements to spuriously demand.
5924 int Last = -1;
5925 bool SawUndef = false;
5926 for (const auto &[Idx, M] : enumerate(Mask)) {
5927 if (M == -1) {
5928 SawUndef = true;
5929 continue;
5930 }
5931 if (SawUndef)
5932 return false;
5933 if (Idx > (unsigned)M)
5934 return false;
5935 if (M <= Last)
5936 return false;
5937 Last = M;
5938 }
5939 return true;
5940}
5941
5942/// Given a shuffle where the indices are disjoint between the two sources,
5943/// e.g.:
5944///
5945/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5946///
5947/// Merge the two sources into one and do a single source shuffle:
5948///
5949/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5950/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5951///
5952/// A vselect will either be merged into a masked instruction or be lowered as a
5953/// vmerge.vvm, which is cheaper than a vrgather.vv.
5955 SelectionDAG &DAG,
5956 const RISCVSubtarget &Subtarget) {
5957 MVT VT = SVN->getSimpleValueType(0);
5958 MVT XLenVT = Subtarget.getXLenVT();
5959 SDLoc DL(SVN);
5960
5961 const ArrayRef<int> Mask = SVN->getMask();
5962
5963 // Work out which source each lane will come from.
5964 SmallVector<int, 16> Srcs(Mask.size(), -1);
5965
5966 for (int Idx : Mask) {
5967 if (Idx == -1)
5968 continue;
5969 unsigned SrcIdx = Idx % Mask.size();
5970 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5971 if (Srcs[SrcIdx] == -1)
5972 // Mark this source as using this lane.
5973 Srcs[SrcIdx] = Src;
5974 else if (Srcs[SrcIdx] != Src)
5975 // The other source is using this lane: not disjoint.
5976 return SDValue();
5977 }
5978
5979 SmallVector<SDValue> SelectMaskVals;
5980 for (int Lane : Srcs) {
5981 if (Lane == -1)
5982 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5983 else
5984 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5985 }
5986 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5987 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5988 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5989 SVN->getOperand(0), SVN->getOperand(1));
5990
5991 // Move all indices relative to the first source.
5992 SmallVector<int> NewMask(Mask.size());
5993 for (unsigned I = 0; I < Mask.size(); I++) {
5994 if (Mask[I] == -1)
5995 NewMask[I] = -1;
5996 else
5997 NewMask[I] = Mask[I] % Mask.size();
5998 }
5999
6000 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
6001}
6002
6003/// Is this mask local (i.e. elements only move within their local span), and
6004/// repeating (that is, the same rearrangement is being done within each span)?
6005static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {
6006 // Require a prefix from the original mask until the consumer code
6007 // is adjusted to rewrite the mask instead of just taking a prefix.
6008 for (auto [I, M] : enumerate(Mask)) {
6009 if (M == -1)
6010 continue;
6011 if ((M / Span) != (int)(I / Span))
6012 return false;
6013 int SpanIdx = I % Span;
6014 int Expected = M % Span;
6015 if (Mask[SpanIdx] != Expected)
6016 return false;
6017 }
6018 return true;
6019}
6020
6021/// Is this mask only using elements from the first span of the input?
6022static bool isLowSourceShuffle(ArrayRef<int> Mask, int Span) {
6023 return all_of(Mask, [&](const auto &Idx) { return Idx == -1 || Idx < Span; });
6024}
6025
6026/// Return true for a mask which performs an arbitrary shuffle within the first
6027/// span, and then repeats that same result across all remaining spans. Note
6028/// that this doesn't check if all the inputs come from a single span!
6029static bool isSpanSplatShuffle(ArrayRef<int> Mask, int Span) {
6030 // Require a prefix from the original mask until the consumer code
6031 // is adjusted to rewrite the mask instead of just taking a prefix.
6032 for (auto [I, M] : enumerate(Mask)) {
6033 if (M == -1)
6034 continue;
6035 int SpanIdx = I % Span;
6036 if (Mask[SpanIdx] != M)
6037 return false;
6038 }
6039 return true;
6040}
6041
6042/// Try to widen element type to get a new mask value for a better permutation
6043/// sequence. This doesn't try to inspect the widened mask for profitability;
6044/// we speculate the widened form is equal or better. This has the effect of
6045/// reducing mask constant sizes - allowing cheaper materialization sequences
6046/// - and index sequence sizes - reducing register pressure and materialization
6047/// cost, at the cost of (possibly) an extra VTYPE toggle.
6049 SDLoc DL(Op);
6050 MVT VT = Op.getSimpleValueType();
6051 MVT ScalarVT = VT.getVectorElementType();
6052 unsigned ElementSize = ScalarVT.getFixedSizeInBits();
6053 SDValue V0 = Op.getOperand(0);
6054 SDValue V1 = Op.getOperand(1);
6055 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
6056
6057 // Avoid wasted work leading to isTypeLegal check failing below
6058 if (ElementSize > 32)
6059 return SDValue();
6060
6061 SmallVector<int, 8> NewMask;
6062 if (!widenShuffleMaskElts(Mask, NewMask))
6063 return SDValue();
6064
6065 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)
6066 : MVT::getIntegerVT(ElementSize * 2);
6067 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
6068 if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
6069 return SDValue();
6070 V0 = DAG.getBitcast(NewVT, V0);
6071 V1 = DAG.getBitcast(NewVT, V1);
6072 return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
6073}
6074
6076 const RISCVSubtarget &Subtarget) {
6077 SDValue V1 = Op.getOperand(0);
6078 SDValue V2 = Op.getOperand(1);
6079 SDLoc DL(Op);
6080 MVT XLenVT = Subtarget.getXLenVT();
6081 MVT VT = Op.getSimpleValueType();
6082 unsigned NumElts = VT.getVectorNumElements();
6084
6085 if (VT.getVectorElementType() == MVT::i1) {
6086 // Lower to a vror.vi of a larger element type if possible before we promote
6087 // i1s to i8s.
6088 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
6089 return V;
6090 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
6091 return V;
6092
6093 // Promote i1 shuffle to i8 shuffle.
6094 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
6095 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
6096 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
6097 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
6098 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
6099 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
6100 ISD::SETNE);
6101 }
6102
6103 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6104
6105 // Store the return value in a single variable instead of structured bindings
6106 // so that we can pass it to GetSlide below, which cannot capture structured
6107 // bindings until C++20.
6108 auto TrueMaskVL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6109 auto [TrueMask, VL] = TrueMaskVL;
6110
6111 if (SVN->isSplat()) {
6112 const int Lane = SVN->getSplatIndex();
6113 if (Lane >= 0) {
6114 MVT SVT = VT.getVectorElementType();
6115
6116 // Turn splatted vector load into a strided load with an X0 stride.
6117 SDValue V = V1;
6118 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
6119 // with undef.
6120 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
6121 int Offset = Lane;
6122 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
6123 int OpElements =
6124 V.getOperand(0).getSimpleValueType().getVectorNumElements();
6125 V = V.getOperand(Offset / OpElements);
6126 Offset %= OpElements;
6127 }
6128
6129 // We need to ensure the load isn't atomic or volatile.
6130 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
6131 auto *Ld = cast<LoadSDNode>(V);
6132 Offset *= SVT.getStoreSize();
6133 SDValue NewAddr = DAG.getMemBasePlusOffset(
6134 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
6135
6136 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
6137 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
6138 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
6139 SDValue IntID =
6140 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
6141 SDValue Ops[] = {Ld->getChain(),
6142 IntID,
6143 DAG.getUNDEF(ContainerVT),
6144 NewAddr,
6145 DAG.getRegister(RISCV::X0, XLenVT),
6146 VL};
6147 SDValue NewLoad = DAG.getMemIntrinsicNode(
6148 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
6150 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
6151 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
6152 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
6153 }
6154
6155 MVT SplatVT = ContainerVT;
6156
6157 // f16 with zvfhmin and bf16 need to use an integer scalar load.
6158 if (SVT == MVT::bf16 ||
6159 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
6160 SVT = MVT::i16;
6161 SplatVT = ContainerVT.changeVectorElementType(SVT);
6162 }
6163
6164 // Otherwise use a scalar load and splat. This will give the best
6165 // opportunity to fold a splat into the operation. ISel can turn it into
6166 // the x0 strided load if we aren't able to fold away the select.
6167 if (SVT.isFloatingPoint())
6168 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
6169 Ld->getPointerInfo().getWithOffset(Offset),
6170 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
6171 else
6172 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
6173 Ld->getPointerInfo().getWithOffset(Offset), SVT,
6174 Ld->getBaseAlign(),
6175 Ld->getMemOperand()->getFlags());
6177
6178 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
6179 : RISCVISD::VMV_V_X_VL;
6180 SDValue Splat =
6181 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
6182 Splat = DAG.getBitcast(ContainerVT, Splat);
6183 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
6184 }
6185
6186 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
6187 assert(Lane < (int)NumElts && "Unexpected lane!");
6188 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
6189 V1, DAG.getConstant(Lane, DL, XLenVT),
6190 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6191 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6192 }
6193 }
6194
6195 // For exact VLEN m2 or greater, try to split to m1 operations if we
6196 // can split cleanly.
6197 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
6198 return V;
6199
6200 ArrayRef<int> Mask = SVN->getMask();
6201
6202 if (SDValue V =
6203 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
6204 return V;
6205
6206 if (SDValue V =
6207 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
6208 return V;
6209
6210 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
6211 // available.
6212 if (Subtarget.hasStdExtZvkb())
6213 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
6214 return V;
6215
6216 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef() &&
6217 NumElts != 2)
6218 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
6219
6220 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
6221 // use shift and truncate to perform the shuffle.
6222 // TODO: For Factor=6, we can perform the first step of the deinterleave via
6223 // shift-and-trunc reducing total cost for everything except an mf8 result.
6224 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
6225 // to do the entire operation.
6226 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
6227 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
6228 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
6229 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
6230 unsigned Index = 0;
6231 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
6232 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
6233 if (SDValue Src = getSingleShuffleSrc(VT, V1, V2))
6234 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
6235 if (1 < count_if(Mask,
6236 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
6237 1 < count_if(Mask, [&Mask](int Idx) {
6238 return Idx >= (int)Mask.size();
6239 })) {
6240 // Narrow each source and concatenate them.
6241 // FIXME: For small LMUL it is better to concatenate first.
6242 MVT EltVT = VT.getVectorElementType();
6243 auto EltCnt = VT.getVectorElementCount();
6244 MVT SubVT =
6245 MVT::getVectorVT(EltVT, EltCnt.divideCoefficientBy(Factor));
6246
6247 SDValue Lo =
6248 getDeinterleaveShiftAndTrunc(DL, SubVT, V1, Factor, Index, DAG);
6249 SDValue Hi =
6250 getDeinterleaveShiftAndTrunc(DL, SubVT, V2, Factor, Index, DAG);
6251
6252 SDValue Concat =
6255 if (Factor == 2)
6256 return Concat;
6257
6258 SDValue Vec = DAG.getUNDEF(VT);
6259 return DAG.getInsertSubvector(DL, Vec, Concat, 0);
6260 }
6261 }
6262 }
6263 }
6264
6265 // If this is a deinterleave(2), try using vunzip{a,b}. This mostly catches
6266 // e64 which can't match above.
6267 unsigned Index = 0;
6268 if (Subtarget.hasVendorXRivosVizip() &&
6270 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
6271 unsigned Opc =
6272 Index == 0 ? RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL;
6273 if (V2.isUndef())
6274 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
6275 if (auto VLEN = Subtarget.getRealVLen();
6276 VLEN && VT.getSizeInBits().getKnownMinValue() % *VLEN == 0)
6277 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
6278 if (SDValue Src = foldConcatVector(V1, V2)) {
6279 EVT NewVT = VT.getDoubleNumVectorElementsVT();
6280 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
6281 SDValue Res =
6282 lowerVZIP(Opc, Src, DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
6283 return DAG.getExtractSubvector(DL, VT, Res, 0);
6284 }
6285 // Deinterleave each source and concatenate them, or concat first, then
6286 // deinterleave.
6287 if (1 < count_if(Mask,
6288 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
6289 1 < count_if(Mask,
6290 [&Mask](int Idx) { return Idx >= (int)Mask.size(); })) {
6291
6292 const unsigned EltSize = VT.getScalarSizeInBits();
6293 const unsigned MinVLMAX = Subtarget.getRealMinVLen() / EltSize;
6294 if (NumElts < MinVLMAX) {
6295 MVT ConcatVT = VT.getDoubleNumVectorElementsVT();
6296 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
6297 SDValue Res =
6298 lowerVZIP(Opc, Concat, DAG.getUNDEF(ConcatVT), DL, DAG, Subtarget);
6299 return DAG.getExtractSubvector(DL, VT, Res, 0);
6300 }
6301
6302 SDValue Lo = lowerVZIP(Opc, V1, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6303 SDValue Hi = lowerVZIP(Opc, V2, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6304
6305 MVT SubVT = VT.getHalfNumVectorElementsVT();
6306 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
6307 DAG.getExtractSubvector(DL, SubVT, Lo, 0),
6308 DAG.getExtractSubvector(DL, SubVT, Hi, 0));
6309 }
6310 }
6311
6312 if (SDValue V =
6313 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
6314 return V;
6315
6316 // Detect an interleave shuffle and lower to
6317 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
6318 int EvenSrc, OddSrc;
6319 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget) &&
6320 !(NumElts == 2 &&
6321 ShuffleVectorInst::isSingleSourceMask(Mask, Mask.size()))) {
6322 // Extract the halves of the vectors.
6323 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6324
6325 // Recognize if one half is actually undef; the matching above will
6326 // otherwise reuse the even stream for the undef one. This improves
6327 // spread(2) shuffles.
6328 bool LaneIsUndef[2] = { true, true};
6329 for (const auto &[Idx, M] : enumerate(Mask))
6330 LaneIsUndef[Idx % 2] &= (M == -1);
6331
6332 int Size = Mask.size();
6333 SDValue EvenV, OddV;
6334 if (LaneIsUndef[0]) {
6335 EvenV = DAG.getUNDEF(HalfVT);
6336 } else {
6337 assert(EvenSrc >= 0 && "Undef source?");
6338 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
6339 EvenV = DAG.getExtractSubvector(DL, HalfVT, EvenV, EvenSrc % Size);
6340 }
6341
6342 if (LaneIsUndef[1]) {
6343 OddV = DAG.getUNDEF(HalfVT);
6344 } else {
6345 assert(OddSrc >= 0 && "Undef source?");
6346 OddV = (OddSrc / Size) == 0 ? V1 : V2;
6347 OddV = DAG.getExtractSubvector(DL, HalfVT, OddV, OddSrc % Size);
6348 }
6349
6350 // Prefer vzip2a if available.
6351 // TODO: Extend to matching zip2b if EvenSrc and OddSrc allow.
6352 if (Subtarget.hasVendorXRivosVizip()) {
6353 EvenV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), EvenV, 0);
6354 OddV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), OddV, 0);
6355 return lowerVZIP(RISCVISD::RI_VZIP2A_VL, EvenV, OddV, DL, DAG, Subtarget);
6356 }
6357 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
6358 }
6359
6360 // Recognize a pattern which can handled via a pair of vslideup/vslidedown
6361 // instructions (in any combination) with masking on the second instruction.
6362 // Also handles masked slides into an identity source, and single slides
6363 // without masking. Avoid matching bit rotates (which are not also element
6364 // rotates) as slide pairs. This is a performance heuristic, not a
6365 // functional check.
6366 std::array<std::pair<int, int>, 2> SrcInfo;
6367 unsigned RotateAmt;
6368 MVT RotateVT;
6369 if (::isMaskedSlidePair(Mask, SrcInfo) &&
6370 (isElementRotate(SrcInfo, NumElts) ||
6371 !isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))) {
6372 SDValue Sources[2];
6373 auto GetSourceFor = [&](const std::pair<int, int> &Info) {
6374 int SrcIdx = Info.first;
6375 assert(SrcIdx == 0 || SrcIdx == 1);
6376 SDValue &Src = Sources[SrcIdx];
6377 if (!Src) {
6378 SDValue SrcV = SrcIdx == 0 ? V1 : V2;
6379 Src = convertToScalableVector(ContainerVT, SrcV, DAG, Subtarget);
6380 }
6381 return Src;
6382 };
6383 auto GetSlide = [&](const std::pair<int, int> &Src, SDValue Mask,
6384 SDValue Passthru) {
6385 auto [TrueMask, VL] = TrueMaskVL;
6386 SDValue SrcV = GetSourceFor(Src);
6387 int SlideAmt = Src.second;
6388 if (SlideAmt == 0) {
6389 // Should never be second operation
6390 assert(Mask == TrueMask);
6391 return SrcV;
6392 }
6393 if (SlideAmt < 0)
6394 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6395 DAG.getConstant(-SlideAmt, DL, XLenVT), Mask, VL,
6397 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6398 DAG.getConstant(SlideAmt, DL, XLenVT), Mask, VL,
6400 };
6401
6402 if (SrcInfo[1].first == -1) {
6403 SDValue Res = DAG.getUNDEF(ContainerVT);
6404 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6405 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6406 }
6407
6408 if (Subtarget.hasVendorXRivosVizip()) {
6409 bool TryWiden = false;
6410 unsigned Factor;
6411 if (isZipEven(SrcInfo, Mask, Factor)) {
6412 if (Factor == 1) {
6413 SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
6414 SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
6415 return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG,
6416 Subtarget);
6417 }
6418 TryWiden = true;
6419 }
6420 if (isZipOdd(SrcInfo, Mask, Factor)) {
6421 if (Factor == 1) {
6422 SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
6423 SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
6424 return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG,
6425 Subtarget);
6426 }
6427 TryWiden = true;
6428 }
6429 // If we found a widening oppurtunity which would let us form a
6430 // zipeven or zipodd, use the generic code to widen the shuffle
6431 // and recurse through this logic.
6432 if (TryWiden)
6433 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6434 return V;
6435 }
6436
6437 // Build the mask. Note that vslideup unconditionally preserves elements
6438 // below the slide amount in the destination, and thus those elements are
6439 // undefined in the mask. If the mask ends up all true (or undef), it
6440 // will be folded away by general logic.
6441 SmallVector<SDValue> MaskVals;
6442 for (const auto &[Idx, M] : enumerate(Mask)) {
6443 if (M < 0 ||
6444 (SrcInfo[1].second > 0 && Idx < (unsigned)SrcInfo[1].second)) {
6445 MaskVals.push_back(DAG.getUNDEF(XLenVT));
6446 continue;
6447 }
6448 int Src = M >= (int)NumElts;
6449 int Diff = (int)Idx - (M % NumElts);
6450 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
6451 assert(C ^ (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
6452 "Must match exactly one of the two slides");
6453 MaskVals.push_back(DAG.getConstant(C, DL, XLenVT));
6454 }
6455 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6456 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6457 SDValue SelectMask = convertToScalableVector(
6458 ContainerVT.changeVectorElementType(MVT::i1),
6459 DAG.getBuildVector(MaskVT, DL, MaskVals), DAG, Subtarget);
6460
6461 SDValue Res = DAG.getUNDEF(ContainerVT);
6462 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6463 Res = GetSlide(SrcInfo[1], SelectMask, Res);
6464 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6465 }
6466
6467 // Handle any remaining single source shuffles
6468 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
6469 if (V2.isUndef()) {
6470 // We might be able to express the shuffle as a bitrotate. But even if we
6471 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
6472 // shifts and a vor will have a higher throughput than a vrgather.
6473 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
6474 return V;
6475
6476 if (SDValue V = lowerVECTOR_SHUFFLEAsVRGatherVX(SVN, Subtarget, DAG))
6477 return V;
6478
6479 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
6480 // is fully covered in interleave(2) above, so it is ignored here.
6481 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
6482 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
6483 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
6484 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
6485 unsigned Index;
6486 if (RISCVTargetLowering::isSpreadMask(Mask, Factor, Index)) {
6487 MVT NarrowVT =
6488 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
6489 SDValue Src = DAG.getExtractSubvector(DL, NarrowVT, V1, 0);
6490 return getWideningSpread(Src, Factor, Index, DL, DAG);
6491 }
6492 }
6493 }
6494
6495 // If only a prefix of the source elements influence a prefix of the
6496 // destination elements, try to see if we can reduce the required LMUL
6497 unsigned MinVLen = Subtarget.getRealMinVLen();
6498 unsigned MinVLMAX = MinVLen / VT.getScalarSizeInBits();
6499 if (NumElts > MinVLMAX) {
6500 unsigned MaxIdx = 0;
6501 for (auto [I, M] : enumerate(Mask)) {
6502 if (M == -1)
6503 continue;
6504 MaxIdx = std::max(std::max((unsigned)I, (unsigned)M), MaxIdx);
6505 }
6506 unsigned NewNumElts =
6507 std::max((uint64_t)MinVLMAX, PowerOf2Ceil(MaxIdx + 1));
6508 if (NewNumElts != NumElts) {
6509 MVT NewVT = MVT::getVectorVT(VT.getVectorElementType(), NewNumElts);
6510 V1 = DAG.getExtractSubvector(DL, NewVT, V1, 0);
6511 SDValue Res = DAG.getVectorShuffle(NewVT, DL, V1, DAG.getUNDEF(NewVT),
6512 Mask.take_front(NewNumElts));
6513 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
6514 }
6515 }
6516
6517 // Before hitting generic lowering fallbacks, try to widen the mask
6518 // to a wider SEW.
6519 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6520 return V;
6521
6522 // Can we generate a vcompress instead of a vrgather? These scale better
6523 // at high LMUL, at the cost of not being able to fold a following select
6524 // into them. The mask constants are also smaller than the index vector
6525 // constants, and thus easier to materialize.
6526 if (isCompressMask(Mask)) {
6527 SmallVector<SDValue> MaskVals(NumElts,
6528 DAG.getConstant(false, DL, XLenVT));
6529 for (auto Idx : Mask) {
6530 if (Idx == -1)
6531 break;
6532 assert(Idx >= 0 && (unsigned)Idx < NumElts);
6533 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
6534 }
6535 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6536 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6537 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
6538 DAG.getUNDEF(VT));
6539 }
6540
6541 if (VT.getScalarSizeInBits() == 8 &&
6542 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
6543 // On such a vector we're unable to use i8 as the index type.
6544 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
6545 // may involve vector splitting if we're already at LMUL=8, or our
6546 // user-supplied maximum fixed-length LMUL.
6547 return SDValue();
6548 }
6549
6550 // Base case for the two operand recursion below - handle the worst case
6551 // single source shuffle.
6552 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
6553 MVT IndexVT = VT.changeTypeToInteger();
6554 // Since we can't introduce illegal index types at this stage, use i16 and
6555 // vrgatherei16 if the corresponding index type for plain vrgather is greater
6556 // than XLenVT.
6557 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
6558 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6559 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6560 }
6561
6562 // If the mask allows, we can do all the index computation in 16 bits. This
6563 // requires less work and less register pressure at high LMUL, and creates
6564 // smaller constants which may be cheaper to materialize.
6565 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
6566 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
6567 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6568 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6569 }
6570
6571 MVT IndexContainerVT =
6572 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
6573
6574 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
6575 SmallVector<SDValue> GatherIndicesLHS;
6576 for (int MaskIndex : Mask) {
6577 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
6578 GatherIndicesLHS.push_back(IsLHSIndex
6579 ? DAG.getConstant(MaskIndex, DL, XLenVT)
6580 : DAG.getUNDEF(XLenVT));
6581 }
6582 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
6583 LHSIndices =
6584 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
6585 // At m1 and less, there's no point trying any of the high LMUL splitting
6586 // techniques. TODO: Should we reconsider this for DLEN < VLEN?
6587 if (NumElts <= MinVLMAX) {
6588 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6589 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6590 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6591 }
6592
6593 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
6594 EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
6595 auto [InnerTrueMask, InnerVL] =
6596 getDefaultScalableVLOps(M1VT, DL, DAG, Subtarget);
6597 int N =
6598 ContainerVT.getVectorMinNumElements() / M1VT.getVectorMinNumElements();
6599 assert(isPowerOf2_32(N) && N <= 8);
6600
6601 // If we have a locally repeating mask, then we can reuse the first
6602 // register in the index register group for all registers within the
6603 // source register group. TODO: This generalizes to m2, and m4.
6604 if (isLocalRepeatingShuffle(Mask, MinVLMAX)) {
6605 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6606 SDValue Gather = DAG.getUNDEF(ContainerVT);
6607 for (int i = 0; i < N; i++) {
6608 unsigned SubIdx = M1VT.getVectorMinNumElements() * i;
6609 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, SubIdx);
6610 SDValue SubVec =
6611 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6612 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6613 Gather = DAG.getInsertSubvector(DL, Gather, SubVec, SubIdx);
6614 }
6615 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6616 }
6617
6618 // If we have a shuffle which only uses the first register in our source
6619 // register group, and repeats the same index across all spans, we can
6620 // use a single vrgather (and possibly some register moves).
6621 // TODO: This can be generalized for m2 or m4, or for any shuffle for
6622 // which we can do a linear number of shuffles to form an m1 which
6623 // contains all the output elements.
6624 if (isLowSourceShuffle(Mask, MinVLMAX) &&
6625 isSpanSplatShuffle(Mask, MinVLMAX)) {
6626 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6627 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6628 SDValue SubVec = DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6629 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6630 SDValue Gather = DAG.getUNDEF(ContainerVT);
6631 for (int i = 0; i < N; i++)
6632 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6633 M1VT.getVectorMinNumElements() * i);
6634 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6635 }
6636
6637 // If we have a shuffle which only uses the first register in our
6638 // source register group, we can do a linear number of m1 vrgathers
6639 // reusing the same source register (but with different indices)
6640 // TODO: This can be generalized for m2 or m4, or for any shuffle
6641 // for which we can do a vslidedown followed by this expansion.
6642 if (isLowSourceShuffle(Mask, MinVLMAX)) {
6643 SDValue SlideAmt =
6644 DAG.getElementCount(DL, XLenVT, M1VT.getVectorElementCount());
6645 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6646 SDValue Gather = DAG.getUNDEF(ContainerVT);
6647 for (int i = 0; i < N; i++) {
6648 if (i != 0)
6649 LHSIndices = getVSlidedown(DAG, Subtarget, DL, IndexContainerVT,
6650 DAG.getUNDEF(IndexContainerVT), LHSIndices,
6651 SlideAmt, TrueMask, VL);
6652 SDValue SubIndex =
6653 DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6654 SDValue SubVec =
6655 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6656 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6657 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6658 M1VT.getVectorMinNumElements() * i);
6659 }
6660 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6661 }
6662
6663 // Fallback to generic vrgather if we can't find anything better.
6664 // On many machines, this will be O(LMUL^2)
6665 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6666 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6667 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6668 }
6669
6670 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
6671 // merged with a second vrgather.
6672 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
6673
6674 // Now construct the mask that will be used by the blended vrgather operation.
6675 // Construct the appropriate indices into each vector.
6676 for (int MaskIndex : Mask) {
6677 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
6678 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
6679 ? MaskIndex : -1);
6680 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
6681 }
6682
6683 // If the mask indices are disjoint between the two sources, we can lower it
6684 // as a vselect + a single source vrgather.vv. Don't do this if we think the
6685 // operands may end up being lowered to something cheaper than a vrgather.vv.
6686 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
6687 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS) &&
6688 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS) &&
6689 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
6690 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
6691 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
6692 return V;
6693
6694 // Before hitting generic lowering fallbacks, try to widen the mask
6695 // to a wider SEW.
6696 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6697 return V;
6698
6699 // Try to pick a profitable operand order.
6700 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
6701 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
6702
6703 // Recursively invoke lowering for each operand if we had two
6704 // independent single source shuffles, and then combine the result via a
6705 // vselect. Note that the vselect will likely be folded back into the
6706 // second permute (vrgather, or other) by the post-isel combine.
6707 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
6708 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
6709
6710 SmallVector<SDValue> MaskVals;
6711 for (int MaskIndex : Mask) {
6712 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
6713 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
6714 }
6715
6716 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6717 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6718 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6719
6720 if (SwapOps)
6721 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
6722 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
6723}
6724
6726 // Only support legal VTs for other shuffles for now.
6727 if (!isTypeLegal(VT))
6728 return false;
6729
6730 // Support splats for any type. These should type legalize well.
6732 return true;
6733
6734 const unsigned NumElts = M.size();
6735 MVT SVT = VT.getSimpleVT();
6736
6737 // Not for i1 vectors.
6738 if (SVT.getScalarType() == MVT::i1)
6739 return false;
6740
6741 std::array<std::pair<int, int>, 2> SrcInfo;
6742 int Dummy1, Dummy2;
6743 return ShuffleVectorInst::isReverseMask(M, NumElts) ||
6744 (::isMaskedSlidePair(M, SrcInfo) &&
6745 isElementRotate(SrcInfo, NumElts)) ||
6746 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
6747}
6748
6749// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
6750// the exponent.
6751SDValue
6752RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
6753 SelectionDAG &DAG) const {
6754 MVT VT = Op.getSimpleValueType();
6755 unsigned EltSize = VT.getScalarSizeInBits();
6756 SDValue Src = Op.getOperand(0);
6757 SDLoc DL(Op);
6758 MVT ContainerVT = VT;
6759
6760 SDValue Mask, VL;
6761 if (Op->isVPOpcode()) {
6762 Mask = Op.getOperand(1);
6763 if (VT.isFixedLengthVector())
6764 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6765 Subtarget);
6766 VL = Op.getOperand(2);
6767 }
6768
6769 // We choose FP type that can represent the value if possible. Otherwise, we
6770 // use rounding to zero conversion for correct exponent of the result.
6771 // TODO: Use f16 for i8 when possible?
6772 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
6773 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
6774 FloatEltVT = MVT::f32;
6775 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
6776
6777 // Legal types should have been checked in the RISCVTargetLowering
6778 // constructor.
6779 // TODO: Splitting may make sense in some cases.
6780 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
6781 "Expected legal float type!");
6782
6783 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
6784 // The trailing zero count is equal to log2 of this single bit value.
6785 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
6786 SDValue Neg = DAG.getNegative(Src, DL, VT);
6787 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
6788 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
6789 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
6790 Src, Mask, VL);
6791 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
6792 }
6793
6794 // We have a legal FP type, convert to it.
6795 SDValue FloatVal;
6796 if (FloatVT.bitsGT(VT)) {
6797 if (Op->isVPOpcode())
6798 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
6799 else
6800 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
6801 } else {
6802 // Use RTZ to avoid rounding influencing exponent of FloatVal.
6803 if (VT.isFixedLengthVector()) {
6804 ContainerVT = getContainerForFixedLengthVector(VT);
6805 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
6806 }
6807 if (!Op->isVPOpcode())
6808 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6809 SDValue RTZRM =
6810 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());
6811 MVT ContainerFloatVT =
6812 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
6813 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
6814 Src, Mask, RTZRM, VL);
6815 if (VT.isFixedLengthVector())
6816 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
6817 }
6818 // Bitcast to integer and shift the exponent to the LSB.
6819 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
6820 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
6821 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
6822
6823 SDValue Exp;
6824 // Restore back to original type. Truncation after SRL is to generate vnsrl.
6825 if (Op->isVPOpcode()) {
6826 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
6827 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
6828 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
6829 } else {
6830 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
6831 DAG.getConstant(ShiftAmt, DL, IntVT));
6832 if (IntVT.bitsLT(VT))
6833 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
6834 else if (IntVT.bitsGT(VT))
6835 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
6836 }
6837
6838 // The exponent contains log2 of the value in biased form.
6839 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
6840 // For trailing zeros, we just need to subtract the bias.
6841 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
6842 return DAG.getNode(ISD::SUB, DL, VT, Exp,
6843 DAG.getConstant(ExponentBias, DL, VT));
6844 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
6845 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
6846 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
6847
6848 // For leading zeros, we need to remove the bias and convert from log2 to
6849 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
6850 unsigned Adjust = ExponentBias + (EltSize - 1);
6851 SDValue Res;
6852 if (Op->isVPOpcode())
6853 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
6854 Mask, VL);
6855 else
6856 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
6857
6858 // The above result with zero input equals to Adjust which is greater than
6859 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
6860 if (Op.getOpcode() == ISD::CTLZ)
6861 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
6862 else if (Op.getOpcode() == ISD::VP_CTLZ)
6863 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
6864 DAG.getConstant(EltSize, DL, VT), Mask, VL);
6865 return Res;
6866}
6867
6868SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
6869 SelectionDAG &DAG) const {
6870 SDLoc DL(Op);
6871 MVT XLenVT = Subtarget.getXLenVT();
6872 SDValue Source = Op->getOperand(0);
6873 MVT SrcVT = Source.getSimpleValueType();
6874 SDValue Mask = Op->getOperand(1);
6875 SDValue EVL = Op->getOperand(2);
6876
6877 if (SrcVT.isFixedLengthVector()) {
6878 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
6879 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
6880 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6881 Subtarget);
6882 SrcVT = ContainerVT;
6883 }
6884
6885 // Convert to boolean vector.
6886 if (SrcVT.getScalarType() != MVT::i1) {
6887 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
6888 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
6889 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
6890 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
6891 DAG.getUNDEF(SrcVT), Mask, EVL});
6892 }
6893
6894 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
6895 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
6896 // In this case, we can interpret poison as -1, so nothing to do further.
6897 return Res;
6898
6899 // Convert -1 to VL.
6900 SDValue SetCC =
6901 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
6902 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
6903 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
6904}
6905
6906// While RVV has alignment restrictions, we should always be able to load as a
6907// legal equivalently-sized byte-typed vector instead. This method is
6908// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
6909// the load is already correctly-aligned, it returns SDValue().
6910SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
6911 SelectionDAG &DAG) const {
6912 auto *Load = cast<LoadSDNode>(Op);
6913 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
6914
6916 Load->getMemoryVT(),
6917 *Load->getMemOperand()))
6918 return SDValue();
6919
6920 SDLoc DL(Op);
6921 MVT VT = Op.getSimpleValueType();
6922 unsigned EltSizeBits = VT.getScalarSizeInBits();
6923 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6924 "Unexpected unaligned RVV load type");
6925 MVT NewVT =
6926 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6927 assert(NewVT.isValid() &&
6928 "Expecting equally-sized RVV vector types to be legal");
6929 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
6930 Load->getPointerInfo(), Load->getBaseAlign(),
6931 Load->getMemOperand()->getFlags());
6932 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
6933}
6934
6935// While RVV has alignment restrictions, we should always be able to store as a
6936// legal equivalently-sized byte-typed vector instead. This method is
6937// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
6938// returns SDValue() if the store is already correctly aligned.
6939SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
6940 SelectionDAG &DAG) const {
6941 auto *Store = cast<StoreSDNode>(Op);
6942 assert(Store && Store->getValue().getValueType().isVector() &&
6943 "Expected vector store");
6944
6946 Store->getMemoryVT(),
6947 *Store->getMemOperand()))
6948 return SDValue();
6949
6950 SDLoc DL(Op);
6951 SDValue StoredVal = Store->getValue();
6952 MVT VT = StoredVal.getSimpleValueType();
6953 unsigned EltSizeBits = VT.getScalarSizeInBits();
6954 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6955 "Unexpected unaligned RVV store type");
6956 MVT NewVT =
6957 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6958 assert(NewVT.isValid() &&
6959 "Expecting equally-sized RVV vector types to be legal");
6960 StoredVal = DAG.getBitcast(NewVT, StoredVal);
6961 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
6962 Store->getPointerInfo(), Store->getBaseAlign(),
6963 Store->getMemOperand()->getFlags());
6964}
6965
6966// While RVV has alignment restrictions, we should always be able to load as a
6967// legal equivalently-sized byte-typed vector instead. This method is
6968// responsible for re-expressing a ISD::VP_LOAD via a correctly-aligned type. If
6969// the load is already correctly-aligned, it returns SDValue().
6970SDValue RISCVTargetLowering::expandUnalignedVPLoad(SDValue Op,
6971 SelectionDAG &DAG) const {
6972 auto *Load = cast<VPLoadSDNode>(Op);
6973 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
6974
6976 Load->getMemoryVT(),
6977 *Load->getMemOperand()))
6978 return SDValue();
6979
6980 SDValue Mask = Load->getMask();
6981
6982 // FIXME: Handled masked loads somehow.
6984 return SDValue();
6985
6986 SDLoc DL(Op);
6987 MVT VT = Op.getSimpleValueType();
6988 unsigned EltSizeBits = VT.getScalarSizeInBits();
6989 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6990 "Unexpected unaligned RVV load type");
6991 MVT NewVT =
6992 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6993 assert(NewVT.isValid() &&
6994 "Expecting equally-sized RVV vector types to be legal");
6995
6996 SDValue VL = Load->getVectorLength();
6997 VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL,
6998 DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType()));
6999
7000 MVT MaskVT = MVT::getVectorVT(MVT::i1, NewVT.getVectorElementCount());
7001 SDValue L = DAG.getLoadVP(NewVT, DL, Load->getChain(), Load->getBasePtr(),
7002 DAG.getAllOnesConstant(DL, MaskVT), VL,
7003 Load->getPointerInfo(), Load->getBaseAlign(),
7004 Load->getMemOperand()->getFlags(), AAMDNodes());
7005 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
7006}
7007
7008// While RVV has alignment restrictions, we should always be able to store as a
7009// legal equivalently-sized byte-typed vector instead. This method is
7010// responsible for re-expressing a ISD::VP STORE via a correctly-aligned type.
7011// It returns SDValue() if the store is already correctly aligned.
7012SDValue RISCVTargetLowering::expandUnalignedVPStore(SDValue Op,
7013 SelectionDAG &DAG) const {
7014 auto *Store = cast<VPStoreSDNode>(Op);
7015 assert(Store && Store->getValue().getValueType().isVector() &&
7016 "Expected vector store");
7017
7019 Store->getMemoryVT(),
7020 *Store->getMemOperand()))
7021 return SDValue();
7022
7023 SDValue Mask = Store->getMask();
7024
7025 // FIXME: Handled masked stores somehow.
7027 return SDValue();
7028
7029 SDLoc DL(Op);
7030 SDValue StoredVal = Store->getValue();
7031 MVT VT = StoredVal.getSimpleValueType();
7032 unsigned EltSizeBits = VT.getScalarSizeInBits();
7033 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
7034 "Unexpected unaligned RVV store type");
7035 MVT NewVT =
7036 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
7037 assert(NewVT.isValid() &&
7038 "Expecting equally-sized RVV vector types to be legal");
7039
7040 SDValue VL = Store->getVectorLength();
7041 VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL,
7042 DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType()));
7043
7044 StoredVal = DAG.getBitcast(NewVT, StoredVal);
7045
7046 LocationSize Size = LocationSize::precise(NewVT.getStoreSize());
7047 MachineFunction &MF = DAG.getMachineFunction();
7048 MachineMemOperand *MMO = MF.getMachineMemOperand(
7049 Store->getPointerInfo(), Store->getMemOperand()->getFlags(), Size,
7050 Store->getBaseAlign());
7051
7052 MVT MaskVT = MVT::getVectorVT(MVT::i1, NewVT.getVectorElementCount());
7053 return DAG.getStoreVP(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
7054 DAG.getUNDEF(Store->getBasePtr().getValueType()),
7055 DAG.getAllOnesConstant(DL, MaskVT), VL, NewVT, MMO,
7057}
7058
7060 const RISCVSubtarget &Subtarget) {
7061 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
7062
7063 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
7064
7065 // All simm32 constants should be handled by isel.
7066 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
7067 // this check redundant, but small immediates are common so this check
7068 // should have better compile time.
7069 if (isInt<32>(Imm))
7070 return Op;
7071
7072 // We only need to cost the immediate, if constant pool lowering is enabled.
7073 if (!Subtarget.useConstantPoolForLargeInts())
7074 return Op;
7075
7077 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
7078 return Op;
7079
7080 // Optimizations below are disabled for opt size. If we're optimizing for
7081 // size, use a constant pool.
7082 if (DAG.shouldOptForSize())
7083 return SDValue();
7084
7085 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
7086 // that if it will avoid a constant pool.
7087 // It will require an extra temporary register though.
7088 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
7089 // low and high 32 bits are the same and bit 31 and 63 are set.
7090 unsigned ShiftAmt, AddOpc;
7091 RISCVMatInt::InstSeq SeqLo =
7092 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
7093 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
7094 return Op;
7095
7096 return SDValue();
7097}
7098
7099SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
7100 SelectionDAG &DAG) const {
7101 MVT VT = Op.getSimpleValueType();
7102 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
7103
7104 // Can this constant be selected by a Zfa FLI instruction?
7105 bool Negate = false;
7106 int Index = getLegalZfaFPImm(Imm, VT);
7107
7108 // If the constant is negative, try negating.
7109 if (Index < 0 && Imm.isNegative()) {
7110 Index = getLegalZfaFPImm(-Imm, VT);
7111 Negate = true;
7112 }
7113
7114 // If we couldn't find a FLI lowering, fall back to generic code.
7115 if (Index < 0)
7116 return SDValue();
7117
7118 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
7119 SDLoc DL(Op);
7120 SDValue Const =
7121 DAG.getNode(RISCVISD::FLI, DL, VT,
7122 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
7123 if (!Negate)
7124 return Const;
7125
7126 return DAG.getNode(ISD::FNEG, DL, VT, Const);
7127}
7128
7130 SelectionDAG &DAG) {
7131
7132 unsigned IsData = Op.getConstantOperandVal(4);
7133
7134 // mips-p8700 we support data prefetch for now.
7135 if (Subtarget.hasVendorXMIPSCBOP() && !IsData)
7136 return Op.getOperand(0);
7137 return Op;
7138}
7139
7141 const RISCVSubtarget &Subtarget) {
7142 SDLoc dl(Op);
7143 AtomicOrdering FenceOrdering =
7144 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
7145 SyncScope::ID FenceSSID =
7146 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
7147
7148 if (Subtarget.hasStdExtZtso()) {
7149 // The only fence that needs an instruction is a sequentially-consistent
7150 // cross-thread fence.
7151 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
7152 FenceSSID == SyncScope::System)
7153 return Op;
7154
7155 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
7156 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
7157 }
7158
7159 // singlethread fences only synchronize with signal handlers on the same
7160 // thread and thus only need to preserve instruction order, not actually
7161 // enforce memory ordering.
7162 if (FenceSSID == SyncScope::SingleThread)
7163 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
7164 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
7165
7166 return Op;
7167}
7168
7169SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
7170 SelectionDAG &DAG) const {
7171 SDLoc DL(Op);
7172 MVT VT = Op.getSimpleValueType();
7173 MVT XLenVT = Subtarget.getXLenVT();
7174 unsigned Check = Op.getConstantOperandVal(1);
7175 unsigned TDCMask = 0;
7176 if (Check & fcSNan)
7177 TDCMask |= RISCV::FPMASK_Signaling_NaN;
7178 if (Check & fcQNan)
7179 TDCMask |= RISCV::FPMASK_Quiet_NaN;
7180 if (Check & fcPosInf)
7182 if (Check & fcNegInf)
7184 if (Check & fcPosNormal)
7186 if (Check & fcNegNormal)
7188 if (Check & fcPosSubnormal)
7190 if (Check & fcNegSubnormal)
7192 if (Check & fcPosZero)
7193 TDCMask |= RISCV::FPMASK_Positive_Zero;
7194 if (Check & fcNegZero)
7195 TDCMask |= RISCV::FPMASK_Negative_Zero;
7196
7197 bool IsOneBitMask = isPowerOf2_32(TDCMask);
7198
7199 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
7200
7201 if (VT.isVector()) {
7202 SDValue Op0 = Op.getOperand(0);
7203 MVT VT0 = Op.getOperand(0).getSimpleValueType();
7204
7205 if (VT.isScalableVector()) {
7206 MVT DstVT = VT0.changeVectorElementTypeToInteger();
7207 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
7208 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
7209 Mask = Op.getOperand(2);
7210 VL = Op.getOperand(3);
7211 }
7212 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
7213 VL, Op->getFlags());
7214 if (IsOneBitMask)
7215 return DAG.getSetCC(DL, VT, FPCLASS,
7216 DAG.getConstant(TDCMask, DL, DstVT),
7218 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
7219 DAG.getConstant(TDCMask, DL, DstVT));
7220 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
7221 ISD::SETNE);
7222 }
7223
7224 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
7225 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7226 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
7227 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
7228 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
7229 Mask = Op.getOperand(2);
7230 MVT MaskContainerVT =
7231 getContainerForFixedLengthVector(Mask.getSimpleValueType());
7232 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
7233 VL = Op.getOperand(3);
7234 }
7235 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
7236
7237 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
7238 Mask, VL, Op->getFlags());
7239
7240 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
7241 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
7242 if (IsOneBitMask) {
7243 SDValue VMSEQ =
7244 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
7245 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
7246 DAG.getUNDEF(ContainerVT), Mask, VL});
7247 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
7248 }
7249 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
7250 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
7251
7252 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
7253 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
7254 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
7255
7256 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
7257 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
7258 DAG.getUNDEF(ContainerVT), Mask, VL});
7259 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
7260 }
7261
7262 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
7263 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
7264 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
7266 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
7267}
7268
7269// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
7270// operations propagate nans.
7272 const RISCVSubtarget &Subtarget) {
7273 SDLoc DL(Op);
7274 MVT VT = Op.getSimpleValueType();
7275
7276 SDValue X = Op.getOperand(0);
7277 SDValue Y = Op.getOperand(1);
7278
7279 if (!VT.isVector()) {
7280 MVT XLenVT = Subtarget.getXLenVT();
7281
7282 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
7283 // ensures that when one input is a nan, the other will also be a nan
7284 // allowing the nan to propagate. If both inputs are nan, this will swap the
7285 // inputs which is harmless.
7286
7287 SDValue NewY = Y;
7288 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
7289 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
7290 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
7291 }
7292
7293 SDValue NewX = X;
7294 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
7295 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
7296 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
7297 }
7298
7299 unsigned Opc =
7300 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
7301 return DAG.getNode(Opc, DL, VT, NewX, NewY);
7302 }
7303
7304 // Check no NaNs before converting to fixed vector scalable.
7305 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
7306 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
7307
7308 MVT ContainerVT = VT;
7309 if (VT.isFixedLengthVector()) {
7310 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
7311 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
7312 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
7313 }
7314
7315 SDValue Mask, VL;
7316 if (Op->isVPOpcode()) {
7317 Mask = Op.getOperand(2);
7318 if (VT.isFixedLengthVector())
7319 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
7320 Subtarget);
7321 VL = Op.getOperand(3);
7322 } else {
7323 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7324 }
7325
7326 SDValue NewY = Y;
7327 if (!XIsNeverNan) {
7328 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
7329 {X, X, DAG.getCondCode(ISD::SETOEQ),
7330 DAG.getUNDEF(ContainerVT), Mask, VL});
7331 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
7332 DAG.getUNDEF(ContainerVT), VL);
7333 }
7334
7335 SDValue NewX = X;
7336 if (!YIsNeverNan) {
7337 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
7338 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
7339 DAG.getUNDEF(ContainerVT), Mask, VL});
7340 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
7341 DAG.getUNDEF(ContainerVT), VL);
7342 }
7343
7344 unsigned Opc =
7345 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
7346 ? RISCVISD::VFMAX_VL
7347 : RISCVISD::VFMIN_VL;
7348 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
7349 DAG.getUNDEF(ContainerVT), Mask, VL);
7350 if (VT.isFixedLengthVector())
7351 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
7352 return Res;
7353}
7354
7356 const RISCVSubtarget &Subtarget) {
7357 bool IsFABS = Op.getOpcode() == ISD::FABS;
7358 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
7359 "Wrong opcode for lowering FABS or FNEG.");
7360
7361 MVT XLenVT = Subtarget.getXLenVT();
7362 MVT VT = Op.getSimpleValueType();
7363 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
7364
7365 SDLoc DL(Op);
7366 SDValue Fmv =
7367 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
7368
7369 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
7370 Mask = Mask.sext(Subtarget.getXLen());
7371
7372 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
7373 SDValue Logic =
7374 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
7375 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
7376}
7377
7379 const RISCVSubtarget &Subtarget) {
7380 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
7381
7382 MVT XLenVT = Subtarget.getXLenVT();
7383 MVT VT = Op.getSimpleValueType();
7384 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
7385
7386 SDValue Mag = Op.getOperand(0);
7387 SDValue Sign = Op.getOperand(1);
7388
7389 SDLoc DL(Op);
7390
7391 // Get sign bit into an integer value.
7392 unsigned SignSize = Sign.getValueSizeInBits();
7393 SDValue SignAsInt = [&]() {
7394 if (SignSize == Subtarget.getXLen())
7395 return DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
7396 switch (SignSize) {
7397 case 16:
7398 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
7399 case 32:
7400 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
7401 case 64: {
7402 assert(XLenVT == MVT::i32 && "Unexpected type");
7403 // Copy the upper word to integer.
7404 SignSize = 32;
7405 return DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
7406 .getValue(1);
7407 }
7408 default:
7409 llvm_unreachable("Unexpected sign size");
7410 }
7411 }();
7412
7413 // Get the signbit at the right position for MagAsInt.
7414 if (int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits())
7415 SignAsInt = DAG.getNode(ShiftAmount > 0 ? ISD::SRL : ISD::SHL, DL, XLenVT,
7416 SignAsInt,
7417 DAG.getConstant(std::abs(ShiftAmount), DL, XLenVT));
7418
7419 // Mask the sign bit and any bits above it. The extra bits will be dropped
7420 // when we convert back to FP.
7421 SDValue SignMask = DAG.getConstant(
7422 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
7423 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
7424
7425 // Transform Mag value to integer, and clear the sign bit.
7426 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
7427 SDValue ClearSignMask = DAG.getConstant(
7428 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
7429 SDValue ClearedSign =
7430 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
7431
7432 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
7434
7435 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
7436}
7437
7438/// Get a RISC-V target specified VL op for a given SDNode.
7439static unsigned getRISCVVLOp(SDValue Op) {
7440#define OP_CASE(NODE) \
7441 case ISD::NODE: \
7442 return RISCVISD::NODE##_VL;
7443#define VP_CASE(NODE) \
7444 case ISD::VP_##NODE: \
7445 return RISCVISD::NODE##_VL;
7446 // clang-format off
7447 switch (Op.getOpcode()) {
7448 default:
7449 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
7450 OP_CASE(ADD)
7451 OP_CASE(SUB)
7452 OP_CASE(MUL)
7453 OP_CASE(MULHS)
7454 OP_CASE(MULHU)
7455 OP_CASE(SDIV)
7456 OP_CASE(SREM)
7457 OP_CASE(UDIV)
7458 OP_CASE(UREM)
7459 OP_CASE(SHL)
7460 OP_CASE(SRA)
7461 OP_CASE(SRL)
7462 OP_CASE(ROTL)
7463 OP_CASE(ROTR)
7464 OP_CASE(BSWAP)
7465 OP_CASE(CTTZ)
7466 OP_CASE(CTLZ)
7467 OP_CASE(CTPOP)
7468 OP_CASE(BITREVERSE)
7469 OP_CASE(SADDSAT)
7470 OP_CASE(UADDSAT)
7471 OP_CASE(SSUBSAT)
7472 OP_CASE(USUBSAT)
7473 OP_CASE(AVGFLOORS)
7474 OP_CASE(AVGFLOORU)
7475 OP_CASE(AVGCEILS)
7476 OP_CASE(AVGCEILU)
7477 OP_CASE(FADD)
7478 OP_CASE(FSUB)
7479 OP_CASE(FMUL)
7480 OP_CASE(FDIV)
7481 OP_CASE(FNEG)
7482 OP_CASE(FABS)
7483 OP_CASE(FCOPYSIGN)
7484 OP_CASE(FSQRT)
7485 OP_CASE(SMIN)
7486 OP_CASE(SMAX)
7487 OP_CASE(UMIN)
7488 OP_CASE(UMAX)
7489 OP_CASE(STRICT_FADD)
7490 OP_CASE(STRICT_FSUB)
7491 OP_CASE(STRICT_FMUL)
7492 OP_CASE(STRICT_FDIV)
7493 OP_CASE(STRICT_FSQRT)
7494 VP_CASE(ADD) // VP_ADD
7495 VP_CASE(SUB) // VP_SUB
7496 VP_CASE(MUL) // VP_MUL
7497 VP_CASE(SDIV) // VP_SDIV
7498 VP_CASE(SREM) // VP_SREM
7499 VP_CASE(UDIV) // VP_UDIV
7500 VP_CASE(UREM) // VP_UREM
7501 VP_CASE(SHL) // VP_SHL
7502 VP_CASE(FADD) // VP_FADD
7503 VP_CASE(FSUB) // VP_FSUB
7504 VP_CASE(FMUL) // VP_FMUL
7505 VP_CASE(FDIV) // VP_FDIV
7506 VP_CASE(FNEG) // VP_FNEG
7507 VP_CASE(FABS) // VP_FABS
7508 VP_CASE(SMIN) // VP_SMIN
7509 VP_CASE(SMAX) // VP_SMAX
7510 VP_CASE(UMIN) // VP_UMIN
7511 VP_CASE(UMAX) // VP_UMAX
7512 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
7513 VP_CASE(SETCC) // VP_SETCC
7514 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
7515 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
7516 VP_CASE(BITREVERSE) // VP_BITREVERSE
7517 VP_CASE(SADDSAT) // VP_SADDSAT
7518 VP_CASE(UADDSAT) // VP_UADDSAT
7519 VP_CASE(SSUBSAT) // VP_SSUBSAT
7520 VP_CASE(USUBSAT) // VP_USUBSAT
7521 VP_CASE(BSWAP) // VP_BSWAP
7522 VP_CASE(CTLZ) // VP_CTLZ
7523 VP_CASE(CTTZ) // VP_CTTZ
7524 VP_CASE(CTPOP) // VP_CTPOP
7526 case ISD::VP_CTLZ_ZERO_UNDEF:
7527 return RISCVISD::CTLZ_VL;
7529 case ISD::VP_CTTZ_ZERO_UNDEF:
7530 return RISCVISD::CTTZ_VL;
7531 case ISD::FMA:
7532 case ISD::VP_FMA:
7533 return RISCVISD::VFMADD_VL;
7534 case ISD::STRICT_FMA:
7535 return RISCVISD::STRICT_VFMADD_VL;
7536 case ISD::AND:
7537 case ISD::VP_AND:
7538 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7539 return RISCVISD::VMAND_VL;
7540 return RISCVISD::AND_VL;
7541 case ISD::OR:
7542 case ISD::VP_OR:
7543 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7544 return RISCVISD::VMOR_VL;
7545 return RISCVISD::OR_VL;
7546 case ISD::XOR:
7547 case ISD::VP_XOR:
7548 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7549 return RISCVISD::VMXOR_VL;
7550 return RISCVISD::XOR_VL;
7551 case ISD::ANY_EXTEND:
7552 case ISD::ZERO_EXTEND:
7553 return RISCVISD::VZEXT_VL;
7554 case ISD::SIGN_EXTEND:
7555 return RISCVISD::VSEXT_VL;
7556 case ISD::SETCC:
7557 return RISCVISD::SETCC_VL;
7558 case ISD::VSELECT:
7559 return RISCVISD::VMERGE_VL;
7560 case ISD::VP_SELECT:
7561 case ISD::VP_MERGE:
7562 return RISCVISD::VMERGE_VL;
7563 case ISD::VP_SRA:
7564 return RISCVISD::SRA_VL;
7565 case ISD::VP_SRL:
7566 return RISCVISD::SRL_VL;
7567 case ISD::VP_SQRT:
7568 return RISCVISD::FSQRT_VL;
7569 case ISD::VP_SIGN_EXTEND:
7570 return RISCVISD::VSEXT_VL;
7571 case ISD::VP_ZERO_EXTEND:
7572 return RISCVISD::VZEXT_VL;
7573 case ISD::VP_FP_TO_SINT:
7574 return RISCVISD::VFCVT_RTZ_X_F_VL;
7575 case ISD::VP_FP_TO_UINT:
7576 return RISCVISD::VFCVT_RTZ_XU_F_VL;
7577 case ISD::FMINNUM:
7578 case ISD::FMINIMUMNUM:
7579 case ISD::VP_FMINNUM:
7580 return RISCVISD::VFMIN_VL;
7581 case ISD::FMAXNUM:
7582 case ISD::FMAXIMUMNUM:
7583 case ISD::VP_FMAXNUM:
7584 return RISCVISD::VFMAX_VL;
7585 case ISD::LRINT:
7586 case ISD::VP_LRINT:
7587 case ISD::LLRINT:
7588 case ISD::VP_LLRINT:
7589 return RISCVISD::VFCVT_RM_X_F_VL;
7590 }
7591 // clang-format on
7592#undef OP_CASE
7593#undef VP_CASE
7594}
7595
7597 const RISCVSubtarget &Subtarget) {
7598 return (Op.getValueType() == MVT::nxv32f16 &&
7599 (Subtarget.hasVInstructionsF16Minimal() &&
7600 !Subtarget.hasVInstructionsF16())) ||
7601 (Op.getValueType() == MVT::nxv32bf16 &&
7602 Subtarget.hasVInstructionsBF16Minimal() &&
7603 (!Subtarget.hasVInstructionsBF16() ||
7604 (!llvm::is_contained(ZvfbfaOps, Op.getOpcode()) &&
7605 !llvm::is_contained(ZvfbfaVPOps, Op.getOpcode()))));
7606}
7607
7609 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7610 SDLoc DL(Op);
7611
7612 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7613 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7614
7615 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7616 if (!Op.getOperand(j).getValueType().isVector()) {
7617 LoOperands[j] = Op.getOperand(j);
7618 HiOperands[j] = Op.getOperand(j);
7619 continue;
7620 }
7621 std::tie(LoOperands[j], HiOperands[j]) =
7622 DAG.SplitVector(Op.getOperand(j), DL);
7623 }
7624
7625 SDValue LoRes =
7626 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7627 SDValue HiRes =
7628 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7629
7630 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7631}
7632
7634 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
7635 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7636 SDLoc DL(Op);
7637
7638 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7639 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7640
7641 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7642 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
7643 std::tie(LoOperands[j], HiOperands[j]) =
7644 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
7645 continue;
7646 }
7647 if (!Op.getOperand(j).getValueType().isVector()) {
7648 LoOperands[j] = Op.getOperand(j);
7649 HiOperands[j] = Op.getOperand(j);
7650 continue;
7651 }
7652 std::tie(LoOperands[j], HiOperands[j]) =
7653 DAG.SplitVector(Op.getOperand(j), DL);
7654 }
7655
7656 SDValue LoRes =
7657 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7658 SDValue HiRes =
7659 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7660
7661 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7662}
7663
7665 SDLoc DL(Op);
7666
7667 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
7668 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
7669 auto [EVLLo, EVLHi] =
7670 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
7671
7672 SDValue ResLo =
7673 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7674 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
7675 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7676 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
7677}
7678
7680
7681 assert(Op->isStrictFPOpcode());
7682
7683 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
7684
7685 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
7686 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
7687
7688 SDLoc DL(Op);
7689
7690 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7691 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7692
7693 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7694 if (!Op.getOperand(j).getValueType().isVector()) {
7695 LoOperands[j] = Op.getOperand(j);
7696 HiOperands[j] = Op.getOperand(j);
7697 continue;
7698 }
7699 std::tie(LoOperands[j], HiOperands[j]) =
7700 DAG.SplitVector(Op.getOperand(j), DL);
7701 }
7702
7703 SDValue LoRes =
7704 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
7705 HiOperands[0] = LoRes.getValue(1);
7706 SDValue HiRes =
7707 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
7708
7709 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
7710 LoRes.getValue(0), HiRes.getValue(0));
7711 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
7712}
7713
7714SDValue
7715RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Load(SDValue Op,
7716 SelectionDAG &DAG) const {
7717 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7718 "Unexpected bfloat16 load lowering");
7719
7720 SDLoc DL(Op);
7721 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
7722 EVT MemVT = LD->getMemoryVT();
7723 SDValue Load = DAG.getExtLoad(
7724 ISD::ZEXTLOAD, DL, Subtarget.getXLenVT(), LD->getChain(),
7725 LD->getBasePtr(),
7727 LD->getMemOperand());
7728 // Using mask to make bf16 nan-boxing valid when we don't have flh
7729 // instruction. -65536 would be treat as a small number and thus it can be
7730 // directly used lui to get the constant.
7731 SDValue mask = DAG.getSignedConstant(-65536, DL, Subtarget.getXLenVT());
7732 SDValue OrSixteenOne =
7733 DAG.getNode(ISD::OR, DL, Load.getValueType(), {Load, mask});
7734 SDValue ConvertedResult =
7735 DAG.getNode(RISCVISD::NDS_FMV_BF16_X, DL, MVT::bf16, OrSixteenOne);
7736 return DAG.getMergeValues({ConvertedResult, Load.getValue(1)}, DL);
7737}
7738
7739SDValue
7740RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Store(SDValue Op,
7741 SelectionDAG &DAG) const {
7742 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7743 "Unexpected bfloat16 store lowering");
7744
7745 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
7746 SDLoc DL(Op);
7747 SDValue FMV = DAG.getNode(RISCVISD::NDS_FMV_X_ANYEXTBF16, DL,
7748 Subtarget.getXLenVT(), ST->getValue());
7749 return DAG.getTruncStore(
7750 ST->getChain(), DL, FMV, ST->getBasePtr(),
7751 EVT::getIntegerVT(*DAG.getContext(), ST->getMemoryVT().getSizeInBits()),
7752 ST->getMemOperand());
7753}
7754
7756 SelectionDAG &DAG) const {
7757 switch (Op.getOpcode()) {
7758 default:
7760 "Unimplemented RISCVTargetLowering::LowerOperation Case");
7761 case ISD::PREFETCH:
7762 return LowerPREFETCH(Op, Subtarget, DAG);
7763 case ISD::ATOMIC_FENCE:
7764 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
7765 case ISD::GlobalAddress:
7766 return lowerGlobalAddress(Op, DAG);
7767 case ISD::BlockAddress:
7768 return lowerBlockAddress(Op, DAG);
7769 case ISD::ConstantPool:
7770 return lowerConstantPool(Op, DAG);
7771 case ISD::JumpTable:
7772 return lowerJumpTable(Op, DAG);
7774 return lowerGlobalTLSAddress(Op, DAG);
7775 case ISD::Constant:
7776 return lowerConstant(Op, DAG, Subtarget);
7777 case ISD::ConstantFP:
7778 return lowerConstantFP(Op, DAG);
7779 case ISD::SELECT:
7780 return lowerSELECT(Op, DAG);
7781 case ISD::BRCOND:
7782 return lowerBRCOND(Op, DAG);
7783 case ISD::VASTART:
7784 return lowerVASTART(Op, DAG);
7785 case ISD::FRAMEADDR:
7786 return lowerFRAMEADDR(Op, DAG);
7787 case ISD::RETURNADDR:
7788 return lowerRETURNADDR(Op, DAG);
7789 case ISD::SHL_PARTS:
7790 return lowerShiftLeftParts(Op, DAG);
7791 case ISD::SRA_PARTS:
7792 return lowerShiftRightParts(Op, DAG, true);
7793 case ISD::SRL_PARTS:
7794 return lowerShiftRightParts(Op, DAG, false);
7795 case ISD::ROTL:
7796 case ISD::ROTR:
7797 if (Op.getValueType().isFixedLengthVector()) {
7798 assert(Subtarget.hasStdExtZvkb());
7799 return lowerToScalableOp(Op, DAG);
7800 }
7801 assert(Subtarget.hasVendorXTHeadBb() &&
7802 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
7803 "Unexpected custom legalization");
7804 // XTHeadBb only supports rotate by constant.
7805 if (!isa<ConstantSDNode>(Op.getOperand(1)))
7806 return SDValue();
7807 return Op;
7808 case ISD::BITCAST: {
7809 SDLoc DL(Op);
7810 EVT VT = Op.getValueType();
7811 SDValue Op0 = Op.getOperand(0);
7812 EVT Op0VT = Op0.getValueType();
7813 MVT XLenVT = Subtarget.getXLenVT();
7814 if (Op0VT == MVT::i16 &&
7815 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
7816 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
7817 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
7818 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
7819 }
7820 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
7821 Subtarget.hasStdExtFOrZfinx()) {
7822 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
7823 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
7824 }
7825 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
7826 Subtarget.hasStdExtDOrZdinx()) {
7827 SDValue Lo, Hi;
7828 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
7829 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
7830 }
7831
7832 if (Subtarget.enablePExtSIMDCodeGen()) {
7833 bool Is32BitCast =
7834 (VT == MVT::i32 && (Op0VT == MVT::v4i8 || Op0VT == MVT::v2i16)) ||
7835 (Op0VT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
7836 bool Is64BitCast =
7837 (VT == MVT::i64 && (Op0VT == MVT::v8i8 || Op0VT == MVT::v4i16 ||
7838 Op0VT == MVT::v2i32)) ||
7839 (Op0VT == MVT::i64 &&
7840 (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32));
7841 if (Is32BitCast || Is64BitCast)
7842 return Op;
7843 }
7844
7845 // Consider other scalar<->scalar casts as legal if the types are legal.
7846 // Otherwise expand them.
7847 if (!VT.isVector() && !Op0VT.isVector()) {
7848 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
7849 return Op;
7850 return SDValue();
7851 }
7852
7853 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
7854 "Unexpected types");
7855
7856 if (VT.isFixedLengthVector()) {
7857 // We can handle fixed length vector bitcasts with a simple replacement
7858 // in isel.
7859 if (Op0VT.isFixedLengthVector())
7860 return Op;
7861 // When bitcasting from scalar to fixed-length vector, insert the scalar
7862 // into a one-element vector of the result type, and perform a vector
7863 // bitcast.
7864 if (!Op0VT.isVector()) {
7865 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
7866 if (!isTypeLegal(BVT))
7867 return SDValue();
7868 return DAG.getBitcast(
7869 VT, DAG.getInsertVectorElt(DL, DAG.getUNDEF(BVT), Op0, 0));
7870 }
7871 return SDValue();
7872 }
7873 // Custom-legalize bitcasts from fixed-length vector types to scalar types
7874 // thus: bitcast the vector to a one-element vector type whose element type
7875 // is the same as the result type, and extract the first element.
7876 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
7877 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
7878 if (!isTypeLegal(BVT))
7879 return SDValue();
7880 SDValue BVec = DAG.getBitcast(BVT, Op0);
7881 return DAG.getExtractVectorElt(DL, VT, BVec, 0);
7882 }
7883 return SDValue();
7884 }
7886 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
7888 return LowerINTRINSIC_W_CHAIN(Op, DAG);
7890 return LowerINTRINSIC_VOID(Op, DAG);
7891 case ISD::IS_FPCLASS:
7892 return LowerIS_FPCLASS(Op, DAG);
7893 case ISD::BITREVERSE: {
7894 MVT VT = Op.getSimpleValueType();
7895 if (VT.isFixedLengthVector()) {
7896 assert(Subtarget.hasStdExtZvbb());
7897 return lowerToScalableOp(Op, DAG);
7898 }
7899 SDLoc DL(Op);
7900 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
7901 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
7902 // Expand bitreverse to a bswap(rev8) followed by brev8.
7903 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
7904 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
7905 }
7906 case ISD::TRUNCATE:
7909 // Only custom-lower vector truncates
7910 if (!Op.getSimpleValueType().isVector())
7911 return Op;
7912 return lowerVectorTruncLike(Op, DAG);
7913 case ISD::ANY_EXTEND:
7914 case ISD::ZERO_EXTEND:
7915 if (Op.getOperand(0).getValueType().isVector() &&
7916 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7917 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
7918 if (Op.getValueType().isScalableVector())
7919 return Op;
7920 return lowerToScalableOp(Op, DAG);
7921 case ISD::SIGN_EXTEND:
7922 if (Op.getOperand(0).getValueType().isVector() &&
7923 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7924 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
7925 if (Op.getValueType().isScalableVector())
7926 return Op;
7927 return lowerToScalableOp(Op, DAG);
7929 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
7931 return lowerINSERT_VECTOR_ELT(Op, DAG);
7933 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7934 case ISD::SCALAR_TO_VECTOR: {
7935 MVT VT = Op.getSimpleValueType();
7936 SDLoc DL(Op);
7937 SDValue Scalar = Op.getOperand(0);
7938 if (VT.getVectorElementType() == MVT::i1) {
7939 MVT WideVT = VT.changeVectorElementType(MVT::i8);
7940 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
7941 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
7942 }
7943 MVT ContainerVT = VT;
7944 if (VT.isFixedLengthVector())
7945 ContainerVT = getContainerForFixedLengthVector(VT);
7946 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
7947
7948 SDValue V;
7949 if (VT.isFloatingPoint()) {
7950 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
7951 DAG.getUNDEF(ContainerVT), Scalar, VL);
7952 } else {
7953 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
7954 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
7955 DAG.getUNDEF(ContainerVT), Scalar, VL);
7956 }
7957 if (VT.isFixedLengthVector())
7958 V = convertFromScalableVector(VT, V, DAG, Subtarget);
7959 return V;
7960 }
7961 case ISD::VSCALE: {
7962 MVT XLenVT = Subtarget.getXLenVT();
7963 MVT VT = Op.getSimpleValueType();
7964 SDLoc DL(Op);
7965 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7966 // We define our scalable vector types for lmul=1 to use a 64 bit known
7967 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
7968 // vscale as VLENB / 8.
7969 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
7970 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
7971 reportFatalInternalError("Support for VLEN==32 is incomplete.");
7972 // We assume VLENB is a multiple of 8. We manually choose the best shift
7973 // here because SimplifyDemandedBits isn't always able to simplify it.
7974 uint64_t Val = Op.getConstantOperandVal(0);
7975 if (isPowerOf2_64(Val)) {
7976 uint64_t Log2 = Log2_64(Val);
7977 if (Log2 < 3) {
7978 SDNodeFlags Flags;
7979 Flags.setExact(true);
7980 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7981 DAG.getConstant(3 - Log2, DL, XLenVT), Flags);
7982 } else if (Log2 > 3) {
7983 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
7984 DAG.getConstant(Log2 - 3, DL, XLenVT));
7985 }
7986 } else if ((Val % 8) == 0) {
7987 // If the multiplier is a multiple of 8, scale it down to avoid needing
7988 // to shift the VLENB value.
7989 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
7990 DAG.getConstant(Val / 8, DL, XLenVT));
7991 } else {
7992 SDNodeFlags Flags;
7993 Flags.setExact(true);
7994 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7995 DAG.getConstant(3, DL, XLenVT), Flags);
7996 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
7997 DAG.getConstant(Val, DL, XLenVT));
7998 }
7999 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
8000 }
8001 case ISD::FPOWI: {
8002 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
8003 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
8004 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
8005 Op.getOperand(1).getValueType() == MVT::i32) {
8006 SDLoc DL(Op);
8007 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
8008 SDValue Powi =
8009 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
8010 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
8011 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
8012 }
8013 return SDValue();
8014 }
8015 case ISD::FMAXIMUM:
8016 case ISD::FMINIMUM:
8017 if (isPromotedOpNeedingSplit(Op, Subtarget))
8018 return SplitVectorOp(Op, DAG);
8019 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
8020 case ISD::FP_EXTEND:
8021 case ISD::FP_ROUND:
8022 return lowerVectorFPExtendOrRoundLike(Op, DAG);
8025 return lowerStrictFPExtendOrRoundLike(Op, DAG);
8026 case ISD::SINT_TO_FP:
8027 case ISD::UINT_TO_FP:
8028 if (Op.getValueType().isVector() &&
8029 ((Op.getValueType().getScalarType() == MVT::f16 &&
8030 (Subtarget.hasVInstructionsF16Minimal() &&
8031 !Subtarget.hasVInstructionsF16())) ||
8032 Op.getValueType().getScalarType() == MVT::bf16)) {
8033 if (isPromotedOpNeedingSplit(Op, Subtarget))
8034 return SplitVectorOp(Op, DAG);
8035 // int -> f32
8036 SDLoc DL(Op);
8037 MVT NVT =
8038 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
8039 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
8040 // f32 -> [b]f16
8041 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
8042 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
8043 }
8044 [[fallthrough]];
8045 case ISD::FP_TO_SINT:
8046 case ISD::FP_TO_UINT:
8047 if (SDValue Op1 = Op.getOperand(0);
8048 Op1.getValueType().isVector() &&
8049 ((Op1.getValueType().getScalarType() == MVT::f16 &&
8050 (Subtarget.hasVInstructionsF16Minimal() &&
8051 !Subtarget.hasVInstructionsF16())) ||
8052 Op1.getValueType().getScalarType() == MVT::bf16)) {
8053 if (isPromotedOpNeedingSplit(Op1, Subtarget))
8054 return SplitVectorOp(Op, DAG);
8055 // [b]f16 -> f32
8056 SDLoc DL(Op);
8057 MVT NVT = MVT::getVectorVT(MVT::f32,
8058 Op1.getValueType().getVectorElementCount());
8059 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
8060 // f32 -> int
8061 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
8062 }
8063 [[fallthrough]];
8068 // RVV can only do fp<->int conversions to types half/double the size as
8069 // the source. We custom-lower any conversions that do two hops into
8070 // sequences.
8071 MVT VT = Op.getSimpleValueType();
8072 if (VT.isScalarInteger())
8073 return lowerFP_TO_INT(Op, DAG, Subtarget);
8074 bool IsStrict = Op->isStrictFPOpcode();
8075 SDValue Src = Op.getOperand(0 + IsStrict);
8076 MVT SrcVT = Src.getSimpleValueType();
8077 if (SrcVT.isScalarInteger())
8078 return lowerINT_TO_FP(Op, DAG, Subtarget);
8079 if (!VT.isVector())
8080 return Op;
8081 SDLoc DL(Op);
8082 MVT EltVT = VT.getVectorElementType();
8083 MVT SrcEltVT = SrcVT.getVectorElementType();
8084 unsigned EltSize = EltVT.getSizeInBits();
8085 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
8086 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
8087 "Unexpected vector element types");
8088
8089 bool IsInt2FP = SrcEltVT.isInteger();
8090 // Widening conversions
8091 if (EltSize > (2 * SrcEltSize)) {
8092 if (IsInt2FP) {
8093 // Do a regular integer sign/zero extension then convert to float.
8094 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
8096 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
8097 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
8100 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
8101 if (IsStrict)
8102 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
8103 Op.getOperand(0), Ext);
8104 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
8105 }
8106 // FP2Int
8107 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
8108 // Do one doubling fp_extend then complete the operation by converting
8109 // to int.
8110 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
8111 if (IsStrict) {
8112 auto [FExt, Chain] =
8113 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
8114 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
8115 }
8116 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
8117 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
8118 }
8119
8120 // Narrowing conversions
8121 if (SrcEltSize > (2 * EltSize)) {
8122 if (IsInt2FP) {
8123 // One narrowing int_to_fp, then an fp_round.
8124 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
8125 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
8126 if (IsStrict) {
8127 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
8128 DAG.getVTList(InterimFVT, MVT::Other),
8129 Op.getOperand(0), Src);
8130 SDValue Chain = Int2FP.getValue(1);
8131 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
8132 }
8133 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
8134 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
8135 }
8136 // FP2Int
8137 // One narrowing fp_to_int, then truncate the integer. If the float isn't
8138 // representable by the integer, the result is poison.
8139 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
8141 if (IsStrict) {
8142 SDValue FP2Int =
8143 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
8144 Op.getOperand(0), Src);
8145 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
8146 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
8147 }
8148 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
8149 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
8150 }
8151
8152 // Scalable vectors can exit here. Patterns will handle equally-sized
8153 // conversions halving/doubling ones.
8154 if (!VT.isFixedLengthVector())
8155 return Op;
8156
8157 // For fixed-length vectors we lower to a custom "VL" node.
8158 unsigned RVVOpc = 0;
8159 switch (Op.getOpcode()) {
8160 default:
8161 llvm_unreachable("Impossible opcode");
8162 case ISD::FP_TO_SINT:
8163 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
8164 break;
8165 case ISD::FP_TO_UINT:
8166 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
8167 break;
8168 case ISD::SINT_TO_FP:
8169 RVVOpc = RISCVISD::SINT_TO_FP_VL;
8170 break;
8171 case ISD::UINT_TO_FP:
8172 RVVOpc = RISCVISD::UINT_TO_FP_VL;
8173 break;
8175 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
8176 break;
8178 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
8179 break;
8181 RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
8182 break;
8184 RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
8185 break;
8186 }
8187
8188 MVT ContainerVT = getContainerForFixedLengthVector(VT);
8189 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8190 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
8191 "Expected same element count");
8192
8193 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8194
8195 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8196 if (IsStrict) {
8197 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8198 Op.getOperand(0), Src, Mask, VL);
8199 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
8200 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
8201 }
8202 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
8203 return convertFromScalableVector(VT, Src, DAG, Subtarget);
8204 }
8207 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
8208 case ISD::FP_TO_BF16: {
8209 // Custom lower to ensure the libcall return is passed in an FPR on hard
8210 // float ABIs.
8211 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
8212 SDLoc DL(Op);
8213 MakeLibCallOptions CallOptions;
8214 RTLIB::Libcall LC =
8215 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
8216 SDValue Res =
8217 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
8218 if (Subtarget.is64Bit())
8219 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
8220 return DAG.getBitcast(MVT::i32, Res);
8221 }
8222 case ISD::BF16_TO_FP: {
8223 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
8224 MVT VT = Op.getSimpleValueType();
8225 SDLoc DL(Op);
8226 Op = DAG.getNode(
8227 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
8228 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
8229 SDValue Res = Subtarget.is64Bit()
8230 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
8231 : DAG.getBitcast(MVT::f32, Op);
8232 // fp_extend if the target VT is bigger than f32.
8233 if (VT != MVT::f32)
8234 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
8235 return Res;
8236 }
8238 case ISD::FP_TO_FP16: {
8239 // Custom lower to ensure the libcall return is passed in an FPR on hard
8240 // float ABIs.
8241 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
8242 SDLoc DL(Op);
8243 MakeLibCallOptions CallOptions;
8244 bool IsStrict = Op->isStrictFPOpcode();
8245 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
8246 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8247 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
8248 SDValue Res;
8249 std::tie(Res, Chain) =
8250 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
8251 if (Subtarget.is64Bit())
8252 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
8253 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
8254 if (IsStrict)
8255 return DAG.getMergeValues({Result, Chain}, DL);
8256 return Result;
8257 }
8259 case ISD::FP16_TO_FP: {
8260 // Custom lower to ensure the libcall argument is passed in an FPR on hard
8261 // float ABIs.
8262 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
8263 SDLoc DL(Op);
8264 MakeLibCallOptions CallOptions;
8265 bool IsStrict = Op->isStrictFPOpcode();
8266 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
8267 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8268 SDValue Arg = Subtarget.is64Bit()
8269 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
8270 : DAG.getBitcast(MVT::f32, Op0);
8271 SDValue Res;
8272 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
8273 CallOptions, DL, Chain);
8274 if (IsStrict)
8275 return DAG.getMergeValues({Res, Chain}, DL);
8276 return Res;
8277 }
8278 case ISD::FTRUNC:
8279 case ISD::FCEIL:
8280 case ISD::FFLOOR:
8281 case ISD::FNEARBYINT:
8282 case ISD::FRINT:
8283 case ISD::FROUND:
8284 case ISD::FROUNDEVEN:
8285 if (isPromotedOpNeedingSplit(Op, Subtarget))
8286 return SplitVectorOp(Op, DAG);
8287 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8288 case ISD::LRINT:
8289 case ISD::LLRINT:
8290 case ISD::LROUND:
8291 case ISD::LLROUND: {
8292 if (Op.getValueType().isVector())
8293 return lowerVectorXRINT_XROUND(Op, DAG, Subtarget);
8294 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
8295 "Unexpected custom legalisation");
8296 SDLoc DL(Op);
8297 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
8298 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
8299 }
8300 case ISD::STRICT_LRINT:
8301 case ISD::STRICT_LLRINT:
8302 case ISD::STRICT_LROUND:
8303 case ISD::STRICT_LLROUND: {
8304 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
8305 "Unexpected custom legalisation");
8306 SDLoc DL(Op);
8307 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
8308 {Op.getOperand(0), Op.getOperand(1)});
8309 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
8310 {Ext.getValue(1), Ext.getValue(0)});
8311 }
8312 case ISD::VECREDUCE_ADD:
8317 return lowerVECREDUCE(Op, DAG);
8318 case ISD::VECREDUCE_AND:
8319 case ISD::VECREDUCE_OR:
8320 case ISD::VECREDUCE_XOR:
8321 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
8322 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
8323 return lowerVECREDUCE(Op, DAG);
8330 return lowerFPVECREDUCE(Op, DAG);
8331 case ISD::VP_REDUCE_ADD:
8332 case ISD::VP_REDUCE_UMAX:
8333 case ISD::VP_REDUCE_SMAX:
8334 case ISD::VP_REDUCE_UMIN:
8335 case ISD::VP_REDUCE_SMIN:
8336 case ISD::VP_REDUCE_FADD:
8337 case ISD::VP_REDUCE_SEQ_FADD:
8338 case ISD::VP_REDUCE_FMIN:
8339 case ISD::VP_REDUCE_FMAX:
8340 case ISD::VP_REDUCE_FMINIMUM:
8341 case ISD::VP_REDUCE_FMAXIMUM:
8342 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
8343 return SplitVectorReductionOp(Op, DAG);
8344 return lowerVPREDUCE(Op, DAG);
8345 case ISD::VP_REDUCE_AND:
8346 case ISD::VP_REDUCE_OR:
8347 case ISD::VP_REDUCE_XOR:
8348 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
8349 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
8350 return lowerVPREDUCE(Op, DAG);
8351 case ISD::VP_CTTZ_ELTS:
8352 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
8353 return lowerVPCttzElements(Op, DAG);
8354 case ISD::UNDEF: {
8355 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
8356 return convertFromScalableVector(Op.getSimpleValueType(),
8357 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
8358 }
8360 return lowerINSERT_SUBVECTOR(Op, DAG);
8362 return lowerEXTRACT_SUBVECTOR(Op, DAG);
8364 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
8366 return lowerVECTOR_INTERLEAVE(Op, DAG);
8367 case ISD::STEP_VECTOR:
8368 return lowerSTEP_VECTOR(Op, DAG);
8370 return lowerVECTOR_REVERSE(Op, DAG);
8373 return lowerVECTOR_SPLICE(Op, DAG);
8374 case ISD::BUILD_VECTOR: {
8375 MVT VT = Op.getSimpleValueType();
8376 MVT EltVT = VT.getVectorElementType();
8377 if (!Subtarget.is64Bit() && EltVT == MVT::i64)
8378 return lowerBuildVectorViaVID(Op, DAG, Subtarget);
8379 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
8380 }
8381 case ISD::SPLAT_VECTOR: {
8382 MVT VT = Op.getSimpleValueType();
8383 MVT EltVT = VT.getVectorElementType();
8384 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
8385 EltVT == MVT::bf16) {
8386 SDLoc DL(Op);
8387 SDValue Elt;
8388 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
8389 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
8390 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
8391 Op.getOperand(0));
8392 else
8393 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
8394 MVT IVT = VT.changeVectorElementType(MVT::i16);
8395 return DAG.getNode(ISD::BITCAST, DL, VT,
8396 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
8397 }
8398
8399 if (EltVT == MVT::i1)
8400 return lowerVectorMaskSplat(Op, DAG);
8401 return SDValue();
8402 }
8404 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
8405 case ISD::CONCAT_VECTORS: {
8406 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
8407 // better than going through the stack, as the default expansion does.
8408 SDLoc DL(Op);
8409 MVT VT = Op.getSimpleValueType();
8410 MVT ContainerVT = VT;
8411 if (VT.isFixedLengthVector())
8412 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
8413
8414 // Recursively split concat_vectors with more than 2 operands:
8415 //
8416 // concat_vector op1, op2, op3, op4
8417 // ->
8418 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
8419 //
8420 // This reduces the length of the chain of vslideups and allows us to
8421 // perform the vslideups at a smaller LMUL, limited to MF2.
8422 if (Op.getNumOperands() > 2 &&
8423 ContainerVT.bitsGE(RISCVTargetLowering::getM1VT(ContainerVT))) {
8424 MVT HalfVT = VT.getHalfNumVectorElementsVT();
8425 assert(isPowerOf2_32(Op.getNumOperands()));
8426 size_t HalfNumOps = Op.getNumOperands() / 2;
8427 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8428 Op->ops().take_front(HalfNumOps));
8429 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8430 Op->ops().drop_front(HalfNumOps));
8431 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8432 }
8433
8434 unsigned NumOpElts =
8435 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
8436 SDValue Vec = DAG.getUNDEF(VT);
8437 for (const auto &OpIdx : enumerate(Op->ops())) {
8438 SDValue SubVec = OpIdx.value();
8439 // Don't insert undef subvectors.
8440 if (SubVec.isUndef())
8441 continue;
8442 Vec = DAG.getInsertSubvector(DL, Vec, SubVec, OpIdx.index() * NumOpElts);
8443 }
8444 return Vec;
8445 }
8446 case ISD::LOAD: {
8447 auto *Load = cast<LoadSDNode>(Op);
8448 EVT VT = Load->getValueType(0);
8449 if (VT == MVT::f64) {
8450 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8451 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8452
8453 // Replace a double precision load with two i32 loads and a BuildPairF64.
8454 SDLoc DL(Op);
8455 SDValue BasePtr = Load->getBasePtr();
8456 SDValue Chain = Load->getChain();
8457
8458 SDValue Lo =
8459 DAG.getLoad(MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo(),
8460 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8461 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8462 SDValue Hi = DAG.getLoad(
8463 MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo().getWithOffset(4),
8464 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8465 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8466 Hi.getValue(1));
8467
8468 // For big-endian, swap the order of Lo and Hi.
8469 if (!Subtarget.isLittleEndian())
8470 std::swap(Lo, Hi);
8471
8472 SDValue Pair = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
8473 return DAG.getMergeValues({Pair, Chain}, DL);
8474 }
8475
8476 if (VT == MVT::bf16)
8477 return lowerXAndesBfHCvtBFloat16Load(Op, DAG);
8478
8479 // Handle normal vector tuple load.
8480 if (VT.isRISCVVectorTuple()) {
8481 SDLoc DL(Op);
8482 MVT XLenVT = Subtarget.getXLenVT();
8483 unsigned NF = VT.getRISCVVectorTupleNumFields();
8484 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8485 unsigned NumElts = Sz / (NF * 8);
8486 int Log2LMUL = Log2_64(NumElts) - 3;
8487
8488 auto Flag = SDNodeFlags();
8489 Flag.setNoUnsignedWrap(true);
8490 SDValue Ret = DAG.getUNDEF(VT);
8491 SDValue BasePtr = Load->getBasePtr();
8492 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8493 VROffset =
8494 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8495 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8496 SmallVector<SDValue, 8> OutChains;
8497
8498 // Load NF vector registers and combine them to a vector tuple.
8499 for (unsigned i = 0; i < NF; ++i) {
8500 SDValue LoadVal = DAG.getLoad(
8501 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
8502 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
8503 OutChains.push_back(LoadVal.getValue(1));
8504 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Ret, LoadVal,
8505 DAG.getTargetConstant(i, DL, MVT::i32));
8506 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8507 }
8508 return DAG.getMergeValues(
8509 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
8510 }
8511
8512 if (auto V = expandUnalignedRVVLoad(Op, DAG))
8513 return V;
8514 if (Op.getValueType().isFixedLengthVector())
8515 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
8516 return Op;
8517 }
8518 case ISD::STORE: {
8519 auto *Store = cast<StoreSDNode>(Op);
8520 SDValue StoredVal = Store->getValue();
8521 EVT VT = StoredVal.getValueType();
8522 if (Subtarget.enablePExtSIMDCodeGen()) {
8523 if (VT == MVT::v2i16 || VT == MVT::v4i8) {
8524 SDValue DL(Op);
8525 SDValue Cast = DAG.getBitcast(MVT::i32, StoredVal);
8526 SDValue NewStore =
8527 DAG.getStore(Store->getChain(), DL, Cast, Store->getBasePtr(),
8528 Store->getPointerInfo(), Store->getBaseAlign(),
8529 Store->getMemOperand()->getFlags());
8530 return NewStore;
8531 }
8532 }
8533 if (VT == MVT::f64) {
8534 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8535 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8536
8537 // Replace a double precision store with a SplitF64 and i32 stores.
8538 SDValue DL(Op);
8539 SDValue BasePtr = Store->getBasePtr();
8540 SDValue Chain = Store->getChain();
8541 SDValue Split = DAG.getNode(RISCVISD::SplitF64, DL,
8542 DAG.getVTList(MVT::i32, MVT::i32), StoredVal);
8543
8544 SDValue Lo = Split.getValue(0);
8545 SDValue Hi = Split.getValue(1);
8546
8547 // For big-endian, swap the order of Lo and Hi before storing.
8548 if (!Subtarget.isLittleEndian())
8549 std::swap(Lo, Hi);
8550
8551 SDValue LoStore = DAG.getStore(
8552 Chain, DL, Lo, BasePtr, Store->getPointerInfo(),
8553 Store->getBaseAlign(), Store->getMemOperand()->getFlags());
8554 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8555 SDValue HiStore = DAG.getStore(
8556 Chain, DL, Hi, BasePtr, Store->getPointerInfo().getWithOffset(4),
8557 Store->getBaseAlign(), Store->getMemOperand()->getFlags());
8558 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, LoStore, HiStore);
8559 }
8560 if (VT == MVT::i64) {
8561 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
8562 "Unexpected custom legalisation");
8563 if (Store->isTruncatingStore())
8564 return SDValue();
8565
8566 if (Store->getAlign() < Subtarget.getZilsdAlign())
8567 return SDValue();
8568
8569 SDLoc DL(Op);
8570 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8571 DAG.getTargetConstant(0, DL, MVT::i32));
8572 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8573 DAG.getTargetConstant(1, DL, MVT::i32));
8574
8575 return DAG.getMemIntrinsicNode(
8576 RISCVISD::SD_RV32, DL, DAG.getVTList(MVT::Other),
8577 {Store->getChain(), Lo, Hi, Store->getBasePtr()}, MVT::i64,
8578 Store->getMemOperand());
8579 }
8580
8581 if (VT == MVT::bf16)
8582 return lowerXAndesBfHCvtBFloat16Store(Op, DAG);
8583
8584 // Handle normal vector tuple store.
8585 if (VT.isRISCVVectorTuple()) {
8586 SDLoc DL(Op);
8587 MVT XLenVT = Subtarget.getXLenVT();
8588 unsigned NF = VT.getRISCVVectorTupleNumFields();
8589 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8590 unsigned NumElts = Sz / (NF * 8);
8591 int Log2LMUL = Log2_64(NumElts) - 3;
8592
8593 auto Flag = SDNodeFlags();
8594 Flag.setNoUnsignedWrap(true);
8595 SDValue Ret;
8596 SDValue Chain = Store->getChain();
8597 SDValue BasePtr = Store->getBasePtr();
8598 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8599 VROffset =
8600 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8601 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8602
8603 // Extract subregisters in a vector tuple and store them individually.
8604 for (unsigned i = 0; i < NF; ++i) {
8605 auto Extract =
8606 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
8607 MVT::getScalableVectorVT(MVT::i8, NumElts), StoredVal,
8608 DAG.getTargetConstant(i, DL, MVT::i32));
8609 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
8610 MachinePointerInfo(Store->getAddressSpace()),
8611 Store->getBaseAlign(),
8612 Store->getMemOperand()->getFlags());
8613 Chain = Ret.getValue(0);
8614 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8615 }
8616 return Ret;
8617 }
8618
8619 if (auto V = expandUnalignedRVVStore(Op, DAG))
8620 return V;
8621 if (Op.getOperand(1).getValueType().isFixedLengthVector())
8622 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
8623 return Op;
8624 }
8625 case ISD::VP_LOAD:
8626 if (SDValue V = expandUnalignedVPLoad(Op, DAG))
8627 return V;
8628 [[fallthrough]];
8629 case ISD::MLOAD:
8630 return lowerMaskedLoad(Op, DAG);
8631 case ISD::VP_LOAD_FF:
8632 return lowerLoadFF(Op, DAG);
8633 case ISD::VP_STORE:
8634 if (SDValue V = expandUnalignedVPStore(Op, DAG))
8635 return V;
8636 [[fallthrough]];
8637 case ISD::MSTORE:
8638 return lowerMaskedStore(Op, DAG);
8640 return lowerVectorCompress(Op, DAG);
8641 case ISD::SELECT_CC: {
8642 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
8643 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
8644 // into separate SETCC+SELECT just like LegalizeDAG.
8645 SDValue Tmp1 = Op.getOperand(0);
8646 SDValue Tmp2 = Op.getOperand(1);
8647 SDValue True = Op.getOperand(2);
8648 SDValue False = Op.getOperand(3);
8649 EVT VT = Op.getValueType();
8650 SDValue CC = Op.getOperand(4);
8651 EVT CmpVT = Tmp1.getValueType();
8652 EVT CCVT =
8653 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
8654 SDLoc DL(Op);
8655 SDValue Cond =
8656 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
8657 return DAG.getSelect(DL, VT, Cond, True, False);
8658 }
8659 case ISD::SETCC: {
8660 MVT OpVT = Op.getOperand(0).getSimpleValueType();
8661 if (OpVT.isScalarInteger()) {
8662 MVT VT = Op.getSimpleValueType();
8663 SDValue LHS = Op.getOperand(0);
8664 SDValue RHS = Op.getOperand(1);
8665 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
8666 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
8667 "Unexpected CondCode");
8668
8669 SDLoc DL(Op);
8670
8671 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
8672 // convert this to the equivalent of (set(u)ge X, C+1) by using
8673 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
8674 // in a register.
8675 if (isa<ConstantSDNode>(RHS)) {
8676 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
8677 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
8678 // If this is an unsigned compare and the constant is -1, incrementing
8679 // the constant would change behavior. The result should be false.
8680 if (CCVal == ISD::SETUGT && Imm == -1)
8681 return DAG.getConstant(0, DL, VT);
8682 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
8683 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8684 SDValue SetCC = DAG.getSetCC(
8685 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
8686 return DAG.getLogicalNOT(DL, SetCC, VT);
8687 }
8688 // Lower (setugt X, 2047) as (setne (srl X, 11), 0).
8689 if (CCVal == ISD::SETUGT && Imm == 2047) {
8690 SDValue Shift = DAG.getNode(ISD::SRL, DL, OpVT, LHS,
8691 DAG.getShiftAmountConstant(11, OpVT, DL));
8692 return DAG.getSetCC(DL, VT, Shift, DAG.getConstant(0, DL, OpVT),
8693 ISD::SETNE);
8694 }
8695 }
8696
8697 // Not a constant we could handle, swap the operands and condition code to
8698 // SETLT/SETULT.
8699 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8700 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
8701 }
8702
8703 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8704 return SplitVectorOp(Op, DAG);
8705
8706 return lowerToScalableOp(Op, DAG);
8707 }
8708 case ISD::ADD:
8709 case ISD::SUB:
8710 case ISD::MUL:
8711 case ISD::MULHS:
8712 case ISD::MULHU:
8713 case ISD::AND:
8714 case ISD::OR:
8715 case ISD::XOR:
8716 case ISD::SDIV:
8717 case ISD::SREM:
8718 case ISD::UDIV:
8719 case ISD::UREM:
8720 case ISD::BSWAP:
8721 case ISD::CTPOP:
8722 case ISD::VSELECT:
8723 return lowerToScalableOp(Op, DAG);
8724 case ISD::SHL:
8725 case ISD::SRL:
8726 case ISD::SRA:
8727 if (Op.getSimpleValueType().isFixedLengthVector()) {
8728 if (Subtarget.enablePExtSIMDCodeGen()) {
8729 // We have patterns for scalar/immediate shift amount, so no lowering
8730 // needed.
8731 if (Op.getOperand(1)->getOpcode() == ISD::SPLAT_VECTOR)
8732 return Op;
8733
8734 // There's no vector-vector version of shift instruction in P extension
8735 // so we need to unroll to scalar computation and pack them back.
8736 return DAG.UnrollVectorOp(Op.getNode());
8737 }
8738 return lowerToScalableOp(Op, DAG);
8739 }
8740 // This can be called for an i32 shift amount that needs to be promoted.
8741 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
8742 "Unexpected custom legalisation");
8743 return SDValue();
8744 case ISD::FABS:
8745 case ISD::FNEG:
8746 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8747 return lowerFABSorFNEG(Op, DAG, Subtarget);
8748 [[fallthrough]];
8749 case ISD::FADD:
8750 case ISD::FSUB:
8751 case ISD::FMUL:
8752 case ISD::FDIV:
8753 case ISD::FSQRT:
8754 case ISD::FMA:
8755 case ISD::FMINNUM:
8756 case ISD::FMAXNUM:
8757 case ISD::FMINIMUMNUM:
8758 case ISD::FMAXIMUMNUM:
8759 if (isPromotedOpNeedingSplit(Op, Subtarget))
8760 return SplitVectorOp(Op, DAG);
8761 [[fallthrough]];
8762 case ISD::AVGFLOORS:
8763 case ISD::AVGFLOORU:
8764 case ISD::AVGCEILS:
8765 case ISD::AVGCEILU:
8766 case ISD::SMIN:
8767 case ISD::SMAX:
8768 case ISD::UMIN:
8769 case ISD::UMAX:
8770 case ISD::UADDSAT:
8771 case ISD::USUBSAT:
8772 case ISD::SADDSAT:
8773 case ISD::SSUBSAT:
8774 return lowerToScalableOp(Op, DAG);
8775 case ISD::ABDS:
8776 case ISD::ABDU: {
8777 SDLoc dl(Op);
8778 EVT VT = Op->getValueType(0);
8779 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
8780 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
8781 bool IsSigned = Op->getOpcode() == ISD::ABDS;
8782
8783 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
8784 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
8785 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
8786 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
8787 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
8788 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
8789 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
8790 }
8791 case ISD::ABS:
8792 case ISD::VP_ABS:
8793 return lowerABS(Op, DAG);
8794 case ISD::CTLZ:
8796 case ISD::CTTZ:
8798 if (Subtarget.hasStdExtZvbb())
8799 return lowerToScalableOp(Op, DAG);
8800 assert(Op.getOpcode() != ISD::CTTZ);
8801 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8802 case ISD::FCOPYSIGN:
8803 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8804 return lowerFCOPYSIGN(Op, DAG, Subtarget);
8805 if (isPromotedOpNeedingSplit(Op, Subtarget))
8806 return SplitVectorOp(Op, DAG);
8807 return lowerToScalableOp(Op, DAG);
8808 case ISD::STRICT_FADD:
8809 case ISD::STRICT_FSUB:
8810 case ISD::STRICT_FMUL:
8811 case ISD::STRICT_FDIV:
8812 case ISD::STRICT_FSQRT:
8813 case ISD::STRICT_FMA:
8814 if (isPromotedOpNeedingSplit(Op, Subtarget))
8815 return SplitStrictFPVectorOp(Op, DAG);
8816 return lowerToScalableOp(Op, DAG);
8817 case ISD::STRICT_FSETCC:
8819 return lowerVectorStrictFSetcc(Op, DAG);
8820 case ISD::STRICT_FCEIL:
8821 case ISD::STRICT_FRINT:
8822 case ISD::STRICT_FFLOOR:
8823 case ISD::STRICT_FTRUNC:
8825 case ISD::STRICT_FROUND:
8827 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8828 case ISD::MGATHER:
8829 case ISD::VP_GATHER:
8830 return lowerMaskedGather(Op, DAG);
8831 case ISD::MSCATTER:
8832 case ISD::VP_SCATTER:
8833 return lowerMaskedScatter(Op, DAG);
8834 case ISD::GET_ROUNDING:
8835 return lowerGET_ROUNDING(Op, DAG);
8836 case ISD::SET_ROUNDING:
8837 return lowerSET_ROUNDING(Op, DAG);
8838 case ISD::GET_FPENV:
8839 return lowerGET_FPENV(Op, DAG);
8840 case ISD::SET_FPENV:
8841 return lowerSET_FPENV(Op, DAG);
8842 case ISD::RESET_FPENV:
8843 return lowerRESET_FPENV(Op, DAG);
8844 case ISD::GET_FPMODE:
8845 return lowerGET_FPMODE(Op, DAG);
8846 case ISD::SET_FPMODE:
8847 return lowerSET_FPMODE(Op, DAG);
8848 case ISD::RESET_FPMODE:
8849 return lowerRESET_FPMODE(Op, DAG);
8850 case ISD::EH_DWARF_CFA:
8851 return lowerEH_DWARF_CFA(Op, DAG);
8852 case ISD::VP_MERGE:
8853 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
8854 return lowerVPMergeMask(Op, DAG);
8855 [[fallthrough]];
8856 case ISD::VP_SELECT:
8857 case ISD::VP_ADD:
8858 case ISD::VP_SUB:
8859 case ISD::VP_MUL:
8860 case ISD::VP_SDIV:
8861 case ISD::VP_UDIV:
8862 case ISD::VP_SREM:
8863 case ISD::VP_UREM:
8864 case ISD::VP_UADDSAT:
8865 case ISD::VP_USUBSAT:
8866 case ISD::VP_SADDSAT:
8867 case ISD::VP_SSUBSAT:
8868 case ISD::VP_LRINT:
8869 case ISD::VP_LLRINT:
8870 return lowerVPOp(Op, DAG);
8871 case ISD::VP_AND:
8872 case ISD::VP_OR:
8873 case ISD::VP_XOR:
8874 return lowerLogicVPOp(Op, DAG);
8875 case ISD::VP_FADD:
8876 case ISD::VP_FSUB:
8877 case ISD::VP_FMUL:
8878 case ISD::VP_FDIV:
8879 case ISD::VP_FNEG:
8880 case ISD::VP_FABS:
8881 case ISD::VP_SQRT:
8882 case ISD::VP_FMA:
8883 case ISD::VP_FMINNUM:
8884 case ISD::VP_FMAXNUM:
8885 case ISD::VP_FCOPYSIGN:
8886 if (isPromotedOpNeedingSplit(Op, Subtarget))
8887 return SplitVPOp(Op, DAG);
8888 [[fallthrough]];
8889 case ISD::VP_SRA:
8890 case ISD::VP_SRL:
8891 case ISD::VP_SHL:
8892 return lowerVPOp(Op, DAG);
8893 case ISD::VP_IS_FPCLASS:
8894 return LowerIS_FPCLASS(Op, DAG);
8895 case ISD::VP_SIGN_EXTEND:
8896 case ISD::VP_ZERO_EXTEND:
8897 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8898 return lowerVPExtMaskOp(Op, DAG);
8899 return lowerVPOp(Op, DAG);
8900 case ISD::VP_TRUNCATE:
8901 return lowerVectorTruncLike(Op, DAG);
8902 case ISD::VP_FP_EXTEND:
8903 case ISD::VP_FP_ROUND:
8904 return lowerVectorFPExtendOrRoundLike(Op, DAG);
8905 case ISD::VP_SINT_TO_FP:
8906 case ISD::VP_UINT_TO_FP:
8907 if (Op.getValueType().isVector() &&
8908 ((Op.getValueType().getScalarType() == MVT::f16 &&
8909 (Subtarget.hasVInstructionsF16Minimal() &&
8910 !Subtarget.hasVInstructionsF16())) ||
8911 Op.getValueType().getScalarType() == MVT::bf16)) {
8912 if (isPromotedOpNeedingSplit(Op, Subtarget))
8913 return SplitVectorOp(Op, DAG);
8914 // int -> f32
8915 SDLoc DL(Op);
8916 MVT NVT =
8917 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
8918 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
8919 // f32 -> [b]f16
8920 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
8921 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
8922 }
8923 [[fallthrough]];
8924 case ISD::VP_FP_TO_SINT:
8925 case ISD::VP_FP_TO_UINT:
8926 if (SDValue Op1 = Op.getOperand(0);
8927 Op1.getValueType().isVector() &&
8928 ((Op1.getValueType().getScalarType() == MVT::f16 &&
8929 (Subtarget.hasVInstructionsF16Minimal() &&
8930 !Subtarget.hasVInstructionsF16())) ||
8931 Op1.getValueType().getScalarType() == MVT::bf16)) {
8932 if (isPromotedOpNeedingSplit(Op1, Subtarget))
8933 return SplitVectorOp(Op, DAG);
8934 // [b]f16 -> f32
8935 SDLoc DL(Op);
8936 MVT NVT = MVT::getVectorVT(MVT::f32,
8937 Op1.getValueType().getVectorElementCount());
8938 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
8939 // f32 -> int
8940 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
8941 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
8942 }
8943 return lowerVPFPIntConvOp(Op, DAG);
8944 case ISD::VP_SETCC:
8945 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8946 return SplitVPOp(Op, DAG);
8947 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8948 return lowerVPSetCCMaskOp(Op, DAG);
8949 [[fallthrough]];
8950 case ISD::VP_SMIN:
8951 case ISD::VP_SMAX:
8952 case ISD::VP_UMIN:
8953 case ISD::VP_UMAX:
8954 case ISD::VP_BITREVERSE:
8955 case ISD::VP_BSWAP:
8956 return lowerVPOp(Op, DAG);
8957 case ISD::VP_CTLZ:
8958 case ISD::VP_CTLZ_ZERO_UNDEF:
8959 if (Subtarget.hasStdExtZvbb())
8960 return lowerVPOp(Op, DAG);
8961 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8962 case ISD::VP_CTTZ:
8963 case ISD::VP_CTTZ_ZERO_UNDEF:
8964 if (Subtarget.hasStdExtZvbb())
8965 return lowerVPOp(Op, DAG);
8966 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8967 case ISD::VP_CTPOP:
8968 return lowerVPOp(Op, DAG);
8969 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
8970 return lowerVPStridedLoad(Op, DAG);
8971 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
8972 return lowerVPStridedStore(Op, DAG);
8973 case ISD::VP_FCEIL:
8974 case ISD::VP_FFLOOR:
8975 case ISD::VP_FRINT:
8976 case ISD::VP_FNEARBYINT:
8977 case ISD::VP_FROUND:
8978 case ISD::VP_FROUNDEVEN:
8979 case ISD::VP_FROUNDTOZERO:
8980 if (isPromotedOpNeedingSplit(Op, Subtarget))
8981 return SplitVPOp(Op, DAG);
8982 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8983 case ISD::VP_FMAXIMUM:
8984 case ISD::VP_FMINIMUM:
8985 if (isPromotedOpNeedingSplit(Op, Subtarget))
8986 return SplitVPOp(Op, DAG);
8987 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
8988 case ISD::EXPERIMENTAL_VP_SPLICE:
8989 return lowerVPSpliceExperimental(Op, DAG);
8990 case ISD::EXPERIMENTAL_VP_REVERSE:
8991 return lowerVPReverseExperimental(Op, DAG);
8992 case ISD::CLEAR_CACHE: {
8993 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
8994 "llvm.clear_cache only needs custom lower on Linux targets");
8995 SDLoc DL(Op);
8996 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8997 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
8998 Op.getOperand(2), Flags, DL);
8999 }
9001 return lowerDYNAMIC_STACKALLOC(Op, DAG);
9003 return lowerINIT_TRAMPOLINE(Op, DAG);
9005 return lowerADJUST_TRAMPOLINE(Op, DAG);
9009 return lowerPARTIAL_REDUCE_MLA(Op, DAG);
9010 }
9011}
9012
9013SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
9014 SDValue Start, SDValue End,
9015 SDValue Flags, SDLoc DL) const {
9016 MakeLibCallOptions CallOptions;
9017 std::pair<SDValue, SDValue> CallResult =
9018 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
9019 {Start, End, Flags}, CallOptions, DL, InChain);
9020
9021 // This function returns void so only the out chain matters.
9022 return CallResult.second;
9023}
9024
9025SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
9026 SelectionDAG &DAG) const {
9027 if (!Subtarget.is64Bit())
9028 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
9029
9030 // Create an MCCodeEmitter to encode instructions.
9031 TargetLoweringObjectFile *TLO = getTargetMachine().getObjFileLowering();
9032 assert(TLO);
9033 MCContext &MCCtx = TLO->getContext();
9034
9035 std::unique_ptr<MCCodeEmitter> CodeEmitter(
9036 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
9037
9038 SDValue Root = Op.getOperand(0);
9039 SDValue Trmp = Op.getOperand(1); // trampoline
9040 SDLoc dl(Op);
9041
9042 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
9043
9044 // We store in the trampoline buffer the following instructions and data.
9045 // Offset:
9046 // 0: auipc t2, 0
9047 // 4: ld t0, 24(t2)
9048 // 8: ld t2, 16(t2)
9049 // 12: jalr t0
9050 // 16: <StaticChainOffset>
9051 // 24: <FunctionAddressOffset>
9052 // 32:
9053 // Offset with branch control flow protection enabled:
9054 // 0: lpad <imm20>
9055 // 4: auipc t3, 0
9056 // 8: ld t2, 28(t3)
9057 // 12: ld t3, 20(t3)
9058 // 16: jalr t2
9059 // 20: <StaticChainOffset>
9060 // 28: <FunctionAddressOffset>
9061 // 36:
9062
9063 const bool HasCFBranch =
9064 Subtarget.hasStdExtZicfilp() &&
9066 "cf-protection-branch");
9067 const unsigned StaticChainIdx = HasCFBranch ? 5 : 4;
9068 const unsigned StaticChainOffset = StaticChainIdx * 4;
9069 const unsigned FunctionAddressOffset = StaticChainOffset + 8;
9070
9071 const MCSubtargetInfo *STI = getTargetMachine().getMCSubtargetInfo();
9072 assert(STI);
9073 auto GetEncoding = [&](const MCInst &MC) {
9076 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
9077 uint32_t Encoding = support::endian::read32le(CB.data());
9078 return Encoding;
9079 };
9080
9081 SmallVector<SDValue> OutChains;
9082
9083 SmallVector<uint32_t> Encodings;
9084 if (!HasCFBranch) {
9085 Encodings.append(
9086 {// auipc t2, 0
9087 // Loads the current PC into t2.
9088 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
9089 // ld t0, 24(t2)
9090 // Loads the function address into t0. Note that we are using offsets
9091 // pc-relative to the first instruction of the trampoline.
9092 GetEncoding(MCInstBuilder(RISCV::LD)
9093 .addReg(RISCV::X5)
9094 .addReg(RISCV::X7)
9095 .addImm(FunctionAddressOffset)),
9096 // ld t2, 16(t2)
9097 // Load the value of the static chain.
9098 GetEncoding(MCInstBuilder(RISCV::LD)
9099 .addReg(RISCV::X7)
9100 .addReg(RISCV::X7)
9101 .addImm(StaticChainOffset)),
9102 // jalr t0
9103 // Jump to the function.
9104 GetEncoding(MCInstBuilder(RISCV::JALR)
9105 .addReg(RISCV::X0)
9106 .addReg(RISCV::X5)
9107 .addImm(0))});
9108 } else {
9109 Encodings.append(
9110 {// auipc x0, <imm20> (lpad <imm20>)
9111 // Landing pad.
9112 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X0).addImm(0)),
9113 // auipc t3, 0
9114 // Loads the current PC into t3.
9115 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X28).addImm(0)),
9116 // ld t2, (FunctionAddressOffset - 4)(t3)
9117 // Loads the function address into t2. Note that we are using offsets
9118 // pc-relative to the SECOND instruction of the trampoline.
9119 GetEncoding(MCInstBuilder(RISCV::LD)
9120 .addReg(RISCV::X7)
9121 .addReg(RISCV::X28)
9122 .addImm(FunctionAddressOffset - 4)),
9123 // ld t3, (StaticChainOffset - 4)(t3)
9124 // Load the value of the static chain.
9125 GetEncoding(MCInstBuilder(RISCV::LD)
9126 .addReg(RISCV::X28)
9127 .addReg(RISCV::X28)
9128 .addImm(StaticChainOffset - 4)),
9129 // jalr t2
9130 // Software-guarded jump to the function.
9131 GetEncoding(MCInstBuilder(RISCV::JALR)
9132 .addReg(RISCV::X0)
9133 .addReg(RISCV::X7)
9134 .addImm(0))});
9135 }
9136
9137 // Store encoded instructions.
9138 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
9139 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
9140 DAG.getConstant(Idx * 4, dl, MVT::i64))
9141 : Trmp;
9142 OutChains.push_back(DAG.getTruncStore(
9143 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
9144 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32));
9145 }
9146
9147 // Now store the variable part of the trampoline.
9148 SDValue FunctionAddress = Op.getOperand(2);
9149 SDValue StaticChain = Op.getOperand(3);
9150
9151 // Store the given static chain and function pointer in the trampoline buffer.
9152 struct OffsetValuePair {
9153 const unsigned Offset;
9154 const SDValue Value;
9155 SDValue Addr = SDValue(); // Used to cache the address.
9156 } OffsetValues[] = {
9157 {StaticChainOffset, StaticChain},
9158 {FunctionAddressOffset, FunctionAddress},
9159 };
9160 for (auto &OffsetValue : OffsetValues) {
9161 SDValue Addr =
9162 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
9163 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
9164 OffsetValue.Addr = Addr;
9165 OutChains.push_back(
9166 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
9167 MachinePointerInfo(TrmpAddr, OffsetValue.Offset)));
9168 }
9169
9170 assert(OutChains.size() == StaticChainIdx + 2 &&
9171 "Size of OutChains mismatch");
9172 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
9173
9174 // The end of instructions of trampoline is the same as the static chain
9175 // address that we computed earlier.
9176 SDValue EndOfTrmp = OffsetValues[0].Addr;
9177
9178 // Call clear cache on the trampoline instructions.
9179 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
9180 Trmp, EndOfTrmp);
9181
9182 return Chain;
9183}
9184
9185SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
9186 SelectionDAG &DAG) const {
9187 if (!Subtarget.is64Bit())
9188 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
9189
9190 return Op.getOperand(0);
9191}
9192
9193SDValue RISCVTargetLowering::lowerPARTIAL_REDUCE_MLA(SDValue Op,
9194 SelectionDAG &DAG) const {
9195 // Currently, only the vqdot and vqdotu case (from zvqdotq) should be legal.
9196 // TODO: There are many other sub-cases we could potentially lower, are
9197 // any of them worthwhile? Ex: via vredsum, vwredsum, vwwmaccu, etc..
9198 SDLoc DL(Op);
9199 MVT VT = Op.getSimpleValueType();
9200 SDValue Accum = Op.getOperand(0);
9201 assert(Accum.getSimpleValueType() == VT &&
9202 VT.getVectorElementType() == MVT::i32);
9203 SDValue A = Op.getOperand(1);
9204 SDValue B = Op.getOperand(2);
9205 MVT ArgVT = A.getSimpleValueType();
9206 assert(ArgVT == B.getSimpleValueType() &&
9207 ArgVT.getVectorElementType() == MVT::i8);
9208 (void)ArgVT;
9209
9210 // The zvqdotq pseudos are defined with sources and destination both
9211 // being i32. This cast is needed for correctness to avoid incorrect
9212 // .vx matching of i8 splats.
9213 A = DAG.getBitcast(VT, A);
9214 B = DAG.getBitcast(VT, B);
9215
9216 MVT ContainerVT = VT;
9217 if (VT.isFixedLengthVector()) {
9218 ContainerVT = getContainerForFixedLengthVector(VT);
9219 Accum = convertToScalableVector(ContainerVT, Accum, DAG, Subtarget);
9220 A = convertToScalableVector(ContainerVT, A, DAG, Subtarget);
9221 B = convertToScalableVector(ContainerVT, B, DAG, Subtarget);
9222 }
9223
9224 unsigned Opc;
9225 switch (Op.getOpcode()) {
9227 Opc = RISCVISD::VQDOT_VL;
9228 break;
9230 Opc = RISCVISD::VQDOTU_VL;
9231 break;
9233 Opc = RISCVISD::VQDOTSU_VL;
9234 break;
9235 default:
9236 llvm_unreachable("Unexpected opcode");
9237 }
9238 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
9239 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, {A, B, Accum, Mask, VL});
9240 if (VT.isFixedLengthVector())
9241 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
9242 return Res;
9243}
9244
9246 SelectionDAG &DAG, unsigned Flags) {
9247 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
9248}
9249
9251 SelectionDAG &DAG, unsigned Flags) {
9252 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
9253 Flags);
9254}
9255
9257 SelectionDAG &DAG, unsigned Flags) {
9258 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
9259 N->getOffset(), Flags);
9260}
9261
9263 SelectionDAG &DAG, unsigned Flags) {
9264 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
9265}
9266
9268 EVT Ty, SelectionDAG &DAG) {
9270 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
9271 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
9272 return DAG.getLoad(
9273 Ty, DL, DAG.getEntryNode(), LC,
9275}
9276
9278 EVT Ty, SelectionDAG &DAG) {
9280 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
9281 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
9282 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
9283 return DAG.getLoad(
9284 Ty, DL, DAG.getEntryNode(), LC,
9286}
9287
9288template <class NodeTy>
9289SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
9290 bool IsLocal, bool IsExternWeak) const {
9291 SDLoc DL(N);
9292 EVT Ty = getPointerTy(DAG.getDataLayout());
9293
9294 // When HWASAN is used and tagging of global variables is enabled
9295 // they should be accessed via the GOT, since the tagged address of a global
9296 // is incompatible with existing code models. This also applies to non-pic
9297 // mode.
9298 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
9299 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
9300 if (IsLocal && !Subtarget.allowTaggedGlobals())
9301 // Use PC-relative addressing to access the symbol. This generates the
9302 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
9303 // %pcrel_lo(auipc)).
9304 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
9305
9306 // Use PC-relative addressing to access the GOT for this symbol, then load
9307 // the address from the GOT. This generates the pattern (PseudoLGA sym),
9308 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
9309 SDValue Load =
9310 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
9311 MachineFunction &MF = DAG.getMachineFunction();
9312 MachineMemOperand *MemOp = MF.getMachineMemOperand(
9316 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
9317 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
9318 return Load;
9319 }
9320
9321 switch (getTargetMachine().getCodeModel()) {
9322 default:
9323 reportFatalUsageError("Unsupported code model for lowering");
9324 case CodeModel::Small: {
9325 // Generate a sequence for accessing addresses within the first 2 GiB of
9326 // address space.
9327 if (Subtarget.hasVendorXqcili()) {
9328 // Use QC.E.LI to generate the address, as this is easier to relax than
9329 // LUI/ADDI.
9330 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
9331 return DAG.getNode(RISCVISD::QC_E_LI, DL, Ty, Addr);
9332 }
9333
9334 // This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
9335 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
9336 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
9337 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
9338 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
9339 }
9340 case CodeModel::Medium: {
9341 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
9342 if (IsExternWeak) {
9343 // An extern weak symbol may be undefined, i.e. have value 0, which may
9344 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
9345 // symbol. This generates the pattern (PseudoLGA sym), which expands to
9346 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
9347 SDValue Load =
9348 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
9349 MachineFunction &MF = DAG.getMachineFunction();
9350 MachineMemOperand *MemOp = MF.getMachineMemOperand(
9354 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
9355 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
9356 return Load;
9357 }
9358
9359 // Generate a sequence for accessing addresses within any 2GiB range within
9360 // the address space. This generates the pattern (PseudoLLA sym), which
9361 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
9362 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
9363 }
9364 case CodeModel::Large: {
9365 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
9366 return getLargeGlobalAddress(G, DL, Ty, DAG);
9367
9368 // Using pc-relative mode for other node type.
9369 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
9370 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
9371 }
9372 }
9373}
9374
9375SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
9376 SelectionDAG &DAG) const {
9377 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
9378 assert(N->getOffset() == 0 && "unexpected offset in global node");
9379 const GlobalValue *GV = N->getGlobal();
9380 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
9381}
9382
9383SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
9384 SelectionDAG &DAG) const {
9385 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
9386
9387 return getAddr(N, DAG);
9388}
9389
9390SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
9391 SelectionDAG &DAG) const {
9392 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
9393
9394 return getAddr(N, DAG);
9395}
9396
9397SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
9398 SelectionDAG &DAG) const {
9399 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
9400
9401 return getAddr(N, DAG);
9402}
9403
9404SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
9405 SelectionDAG &DAG,
9406 bool UseGOT) const {
9407 SDLoc DL(N);
9408 EVT Ty = getPointerTy(DAG.getDataLayout());
9409 const GlobalValue *GV = N->getGlobal();
9410 MVT XLenVT = Subtarget.getXLenVT();
9411
9412 if (UseGOT) {
9413 // Use PC-relative addressing to access the GOT for this TLS symbol, then
9414 // load the address from the GOT and add the thread pointer. This generates
9415 // the pattern (PseudoLA_TLS_IE sym), which expands to
9416 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
9417 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9418 SDValue Load =
9419 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
9420 MachineFunction &MF = DAG.getMachineFunction();
9421 MachineMemOperand *MemOp = MF.getMachineMemOperand(
9425 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
9426 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
9427
9428 // Add the thread pointer.
9429 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
9430 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
9431 }
9432
9433 // Generate a sequence for accessing the address relative to the thread
9434 // pointer, with the appropriate adjustment for the thread pointer offset.
9435 // This generates the pattern
9436 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
9437 SDValue AddrHi =
9439 SDValue AddrAdd =
9441 SDValue AddrLo =
9443
9444 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
9445 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
9446 SDValue MNAdd =
9447 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
9448 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
9449}
9450
9451SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
9452 SelectionDAG &DAG) const {
9453 SDLoc DL(N);
9454 EVT Ty = getPointerTy(DAG.getDataLayout());
9455 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
9456 const GlobalValue *GV = N->getGlobal();
9457
9458 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9459 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
9460 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
9461 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9462 SDValue Load =
9463 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
9464
9465 // Prepare argument list to generate call.
9467 Args.emplace_back(Load, CallTy);
9468
9469 // Setup call to __tls_get_addr.
9470 TargetLowering::CallLoweringInfo CLI(DAG);
9471 CLI.setDebugLoc(DL)
9472 .setChain(DAG.getEntryNode())
9473 .setLibCallee(CallingConv::C, CallTy,
9474 DAG.getExternalSymbol("__tls_get_addr", Ty),
9475 std::move(Args));
9476
9477 return LowerCallTo(CLI).first;
9478}
9479
9480SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
9481 SelectionDAG &DAG) const {
9482 SDLoc DL(N);
9483 EVT Ty = getPointerTy(DAG.getDataLayout());
9484 const GlobalValue *GV = N->getGlobal();
9485
9486 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9487 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
9488 //
9489 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
9490 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
9491 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
9492 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
9493 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9494 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
9495}
9496
9497SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
9498 SelectionDAG &DAG) const {
9499 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
9500 assert(N->getOffset() == 0 && "unexpected offset in global node");
9501
9502 if (DAG.getTarget().useEmulatedTLS())
9503 return LowerToTLSEmulatedModel(N, DAG);
9504
9506
9509 reportFatalUsageError("In GHC calling convention TLS is not supported");
9510
9511 SDValue Addr;
9512 switch (Model) {
9514 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
9515 break;
9517 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
9518 break;
9521 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
9522 : getDynamicTLSAddr(N, DAG);
9523 break;
9524 }
9525
9526 return Addr;
9527}
9528
9529// Return true if Val is equal to (setcc LHS, RHS, CC).
9530// Return false if Val is the inverse of (setcc LHS, RHS, CC).
9531// Otherwise, return std::nullopt.
9532static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
9533 ISD::CondCode CC, SDValue Val) {
9534 assert(Val->getOpcode() == ISD::SETCC);
9535 SDValue LHS2 = Val.getOperand(0);
9536 SDValue RHS2 = Val.getOperand(1);
9537 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
9538
9539 if (LHS == LHS2 && RHS == RHS2) {
9540 if (CC == CC2)
9541 return true;
9542 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9543 return false;
9544 } else if (LHS == RHS2 && RHS == LHS2) {
9546 if (CC == CC2)
9547 return true;
9548 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9549 return false;
9550 }
9551
9552 return std::nullopt;
9553}
9554
9556 return isa<ConstantSDNode>(V) && V->getAsAPIntVal().isSignedIntN(12);
9557}
9558
9560 const RISCVSubtarget &Subtarget) {
9561 SDValue CondV = N->getOperand(0);
9562 SDValue TrueV = N->getOperand(1);
9563 SDValue FalseV = N->getOperand(2);
9564 MVT VT = N->getSimpleValueType(0);
9565 SDLoc DL(N);
9566
9567 if (!Subtarget.hasConditionalMoveFusion()) {
9568 // (select c, -1, y) -> -c | y
9569 if (isAllOnesConstant(TrueV)) {
9570 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9571 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
9572 }
9573 // (select c, y, -1) -> (c-1) | y
9574 if (isAllOnesConstant(FalseV)) {
9575 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
9576 DAG.getAllOnesConstant(DL, VT));
9577 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
9578 }
9579
9580 const bool HasCZero = VT.isScalarInteger() && Subtarget.hasCZEROLike();
9581
9582 // (select c, 0, y) -> (c-1) & y
9583 if (isNullConstant(TrueV) && (!HasCZero || isSimm12Constant(FalseV))) {
9584 SDValue Neg =
9585 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
9586 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
9587 }
9588 if (isNullConstant(FalseV)) {
9589 // (select c, (1 << ShAmount) + 1, 0) -> (c << ShAmount) + c
9590 if (auto *TrueC = dyn_cast<ConstantSDNode>(TrueV)) {
9591 uint64_t TrueM1 = TrueC->getZExtValue() - 1;
9592 if (isPowerOf2_64(TrueM1)) {
9593 unsigned ShAmount = Log2_64(TrueM1);
9594 if (Subtarget.hasShlAdd(ShAmount))
9595 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, CondV,
9596 DAG.getTargetConstant(ShAmount, DL, VT), CondV);
9597 }
9598 }
9599 // (select c, y, 0) -> -c & y
9600 if (!HasCZero || isSimm12Constant(TrueV)) {
9601 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9602 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
9603 }
9604 }
9605 }
9606
9607 // select c, ~x, x --> xor -c, x
9608 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9609 const APInt &TrueVal = TrueV->getAsAPIntVal();
9610 const APInt &FalseVal = FalseV->getAsAPIntVal();
9611 if (~TrueVal == FalseVal) {
9612 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9613 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
9614 }
9615 }
9616
9617 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
9618 // when both truev and falsev are also setcc.
9619 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
9620 FalseV.getOpcode() == ISD::SETCC) {
9621 SDValue LHS = CondV.getOperand(0);
9622 SDValue RHS = CondV.getOperand(1);
9623 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9624
9625 // (select x, x, y) -> x | y
9626 // (select !x, x, y) -> x & y
9627 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
9628 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
9629 DAG.getFreeze(FalseV));
9630 }
9631 // (select x, y, x) -> x & y
9632 // (select !x, y, x) -> x | y
9633 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
9634 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
9635 DAG.getFreeze(TrueV), FalseV);
9636 }
9637 }
9638
9639 return SDValue();
9640}
9641
9642// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
9643// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
9644// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
9645// being `0` or `-1`. In such cases we can replace `select` with `and`.
9646// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
9647// than `c0`?
9648static SDValue
9650 const RISCVSubtarget &Subtarget) {
9651 if (Subtarget.hasShortForwardBranchIALU())
9652 return SDValue();
9653
9654 unsigned SelOpNo = 0;
9655 SDValue Sel = BO->getOperand(0);
9656 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
9657 SelOpNo = 1;
9658 Sel = BO->getOperand(1);
9659 }
9660
9661 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
9662 return SDValue();
9663
9664 unsigned ConstSelOpNo = 1;
9665 unsigned OtherSelOpNo = 2;
9666 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
9667 ConstSelOpNo = 2;
9668 OtherSelOpNo = 1;
9669 }
9670 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
9671 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
9672 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
9673 return SDValue();
9674
9675 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
9676 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
9677 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
9678 return SDValue();
9679
9680 SDLoc DL(Sel);
9681 EVT VT = BO->getValueType(0);
9682
9683 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
9684 if (SelOpNo == 1)
9685 std::swap(NewConstOps[0], NewConstOps[1]);
9686
9687 SDValue NewConstOp =
9688 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
9689 if (!NewConstOp)
9690 return SDValue();
9691
9692 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
9693 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
9694 return SDValue();
9695
9696 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
9697 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
9698 if (SelOpNo == 1)
9699 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
9700 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
9701
9702 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
9703 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
9704 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
9705}
9706
9707// Returns true if VT is a P extension packed SIMD type that fits in XLen.
9708static bool isPExtPackedType(MVT VT, const RISCVSubtarget &Subtarget) {
9709 if (!Subtarget.enablePExtSIMDCodeGen())
9710 return false;
9711
9712 if (Subtarget.is64Bit())
9713 return VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32;
9714 return VT == MVT::v4i8 || VT == MVT::v2i16;
9715}
9716
9717SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
9718 SDValue CondV = Op.getOperand(0);
9719 SDValue TrueV = Op.getOperand(1);
9720 SDValue FalseV = Op.getOperand(2);
9721 SDLoc DL(Op);
9722 MVT VT = Op.getSimpleValueType();
9723 MVT XLenVT = Subtarget.getXLenVT();
9724
9725 // Handle P extension packed types by bitcasting to XLenVT for selection,
9726 // e.g. select i1 %cond, <2 x i16> %TrueV, <2 x i16> %FalseV
9727 // These types fit in a single GPR so can use the same selection mechanism
9728 // as scalars.
9729 if (isPExtPackedType(VT, Subtarget)) {
9730 SDValue TrueVInt = DAG.getBitcast(XLenVT, TrueV);
9731 SDValue FalseVInt = DAG.getBitcast(XLenVT, FalseV);
9732 SDValue ResultInt =
9733 DAG.getNode(ISD::SELECT, DL, XLenVT, CondV, TrueVInt, FalseVInt);
9734 return DAG.getBitcast(VT, ResultInt);
9735 }
9736
9737 // Lower vector SELECTs to VSELECTs by splatting the condition.
9738 if (VT.isVector()) {
9739 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
9740 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
9741 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
9742 }
9743
9744 // Try some other optimizations before falling back to generic lowering.
9745 if (SDValue V = lowerSelectToBinOp(Op.getNode(), DAG, Subtarget))
9746 return V;
9747
9748 // When there is no cost for GPR <-> FPR, we can use zicond select for
9749 // floating value when CondV is int type
9750 bool FPinGPR = Subtarget.hasStdExtZfinx();
9751
9752 // We can handle FGPR without spliting into hi/lo parts
9753 bool FitsInGPR = TypeSize::isKnownLE(VT.getSizeInBits(),
9754 Subtarget.getXLenVT().getSizeInBits());
9755
9756 bool UseZicondForFPSel = Subtarget.hasStdExtZicond() && FPinGPR &&
9757 VT.isFloatingPoint() && FitsInGPR;
9758
9759 if (UseZicondForFPSel) {
9760
9761 auto CastToInt = [&](SDValue V) -> SDValue {
9762 // Treat +0.0 as int 0 to enable single 'czero' instruction generation.
9763 if (isNullFPConstant(V))
9764 return DAG.getConstant(0, DL, XLenVT);
9765
9766 if (VT == MVT::f16)
9767 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, V);
9768
9769 if (VT == MVT::f32 && Subtarget.is64Bit())
9770 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, V);
9771
9772 return DAG.getBitcast(XLenVT, V);
9773 };
9774
9775 SDValue TrueVInt = CastToInt(TrueV);
9776 SDValue FalseVInt = CastToInt(FalseV);
9777
9778 // Emit integer SELECT (lowers to Zicond)
9779 SDValue ResultInt =
9780 DAG.getNode(ISD::SELECT, DL, XLenVT, CondV, TrueVInt, FalseVInt);
9781
9782 // Convert back to floating VT
9783 if (VT == MVT::f32 && Subtarget.is64Bit())
9784 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, VT, ResultInt);
9785
9786 if (VT == MVT::f16)
9787 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, ResultInt);
9788
9789 return DAG.getBitcast(VT, ResultInt);
9790 }
9791
9792 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
9793 // nodes to implement the SELECT. Performing the lowering here allows for
9794 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
9795 // sequence or RISCVISD::SELECT_CC node (branch-based select).
9796 if (Subtarget.hasCZEROLike() && VT.isScalarInteger()) {
9797
9798 // (select c, t, 0) -> (czero_eqz t, c)
9799 if (isNullConstant(FalseV))
9800 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
9801 // (select c, 0, f) -> (czero_nez f, c)
9802 if (isNullConstant(TrueV))
9803 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
9804
9805 // Check to see if a given operation is a 'NOT', if so return the negated
9806 // operand
9807 auto getNotOperand = [](const SDValue &Op) -> std::optional<const SDValue> {
9808 using namespace llvm::SDPatternMatch;
9809 SDValue Xor;
9810 if (sd_match(Op, m_OneUse(m_Not(m_Value(Xor))))) {
9811 return Xor;
9812 }
9813 return std::nullopt;
9814 };
9815 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
9816 // (select c, (and f, ~x), f) -> (andn f, (czero_eqz x, c))
9817 if (TrueV.getOpcode() == ISD::AND &&
9818 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV)) {
9819 auto NotOperand = (TrueV.getOperand(0) == FalseV)
9820 ? getNotOperand(TrueV.getOperand(1))
9821 : getNotOperand(TrueV.getOperand(0));
9822 if (NotOperand) {
9823 SDValue CMOV =
9824 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, *NotOperand, CondV);
9825 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9826 return DAG.getNode(ISD::AND, DL, VT, FalseV, NOT);
9827 }
9828 return DAG.getNode(
9829 ISD::OR, DL, VT, TrueV,
9830 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
9831 }
9832
9833 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
9834 // (select c, t, (and t, ~x)) -> (andn t, (czero_nez x, c))
9835 if (FalseV.getOpcode() == ISD::AND &&
9836 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV)) {
9837 auto NotOperand = (FalseV.getOperand(0) == TrueV)
9838 ? getNotOperand(FalseV.getOperand(1))
9839 : getNotOperand(FalseV.getOperand(0));
9840 if (NotOperand) {
9841 SDValue CMOV =
9842 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, *NotOperand, CondV);
9843 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9844 return DAG.getNode(ISD::AND, DL, VT, TrueV, NOT);
9845 }
9846 return DAG.getNode(
9847 ISD::OR, DL, VT, FalseV,
9848 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
9849 }
9850
9851 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
9852 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
9853 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9854 const APInt &TrueVal = TrueV->getAsAPIntVal();
9855 const APInt &FalseVal = FalseV->getAsAPIntVal();
9856
9857 // Prefer these over Zicond to avoid materializing an immediate:
9858 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
9859 // (select (x > -1), z, y) -> x >> (XLEN - 1) & (y - z) + z
9860 if (CondV.getOpcode() == ISD::SETCC &&
9861 CondV.getOperand(0).getValueType() == VT && CondV.hasOneUse()) {
9862 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9863 if ((CCVal == ISD::SETLT && isNullConstant(CondV.getOperand(1))) ||
9864 (CCVal == ISD::SETGT && isAllOnesConstant(CondV.getOperand(1)))) {
9865 int64_t TrueImm = TrueVal.getSExtValue();
9866 int64_t FalseImm = FalseVal.getSExtValue();
9867 if (CCVal == ISD::SETGT)
9868 std::swap(TrueImm, FalseImm);
9869 if (isInt<12>(TrueImm) && isInt<12>(FalseImm) &&
9870 isInt<12>(TrueImm - FalseImm)) {
9871 SDValue SRA =
9872 DAG.getNode(ISD::SRA, DL, VT, CondV.getOperand(0),
9873 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
9874 SDValue AND =
9875 DAG.getNode(ISD::AND, DL, VT, SRA,
9876 DAG.getSignedConstant(TrueImm - FalseImm, DL, VT));
9877 return DAG.getNode(ISD::ADD, DL, VT, AND,
9878 DAG.getSignedConstant(FalseImm, DL, VT));
9879 }
9880 }
9881 }
9882
9883 // Use SHL/ADDI (and possible XORI) to avoid having to materialize
9884 // a constant in register
9885 if ((TrueVal - FalseVal).isPowerOf2() && FalseVal.isSignedIntN(12)) {
9886 SDValue Log2 = DAG.getConstant((TrueVal - FalseVal).logBase2(), DL, VT);
9887 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9888 return DAG.getNode(ISD::ADD, DL, VT, FalseV, BitDiff);
9889 }
9890 if ((FalseVal - TrueVal).isPowerOf2() && TrueVal.isSignedIntN(12)) {
9891 SDValue Log2 = DAG.getConstant((FalseVal - TrueVal).logBase2(), DL, VT);
9892 CondV = DAG.getLogicalNOT(DL, CondV, CondV->getValueType(0));
9893 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9894 return DAG.getNode(ISD::ADD, DL, VT, TrueV, BitDiff);
9895 }
9896
9897 auto getCost = [&](const APInt &Delta, const APInt &Addend) {
9898 const int DeltaCost = RISCVMatInt::getIntMatCost(
9899 Delta, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9900 // Does the addend fold into an ADDI
9901 if (Addend.isSignedIntN(12))
9902 return DeltaCost;
9903 const int AddendCost = RISCVMatInt::getIntMatCost(
9904 Addend, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9905 return AddendCost + DeltaCost;
9906 };
9907 bool IsCZERO_NEZ = getCost(FalseVal - TrueVal, TrueVal) <=
9908 getCost(TrueVal - FalseVal, FalseVal);
9909 SDValue LHSVal = DAG.getConstant(
9910 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
9911 SDValue CMOV =
9912 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9913 DL, VT, LHSVal, CondV);
9914 return DAG.getNode(ISD::ADD, DL, VT, CMOV, IsCZERO_NEZ ? TrueV : FalseV);
9915 }
9916
9917 // (select c, c1, t) -> (add (czero_nez t - c1, c), c1)
9918 // (select c, t, c1) -> (add (czero_eqz t - c1, c), c1)
9919 if (isa<ConstantSDNode>(TrueV) != isa<ConstantSDNode>(FalseV)) {
9920 bool IsCZERO_NEZ = isa<ConstantSDNode>(TrueV);
9921 SDValue ConstVal = IsCZERO_NEZ ? TrueV : FalseV;
9922 SDValue RegV = IsCZERO_NEZ ? FalseV : TrueV;
9923 int64_t RawConstVal = cast<ConstantSDNode>(ConstVal)->getSExtValue();
9924 // Efficient only if the constant and its negation fit into `ADDI`
9925 // Prefer Add/Sub over Xor since can be compressed for small immediates
9926 if (isInt<12>(RawConstVal)) {
9927 // Fall back to XORI if Const == -0x800 since we don't have SUBI.
9928 unsigned SubOpc = (RawConstVal == -0x800) ? ISD::XOR : ISD::SUB;
9929 unsigned AddOpc = (RawConstVal == -0x800) ? ISD::XOR : ISD::ADD;
9930 SDValue SubOp = DAG.getNode(SubOpc, DL, VT, RegV, ConstVal);
9931 SDValue CZERO =
9932 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9933 DL, VT, SubOp, CondV);
9934 return DAG.getNode(AddOpc, DL, VT, CZERO, ConstVal);
9935 }
9936 }
9937
9938 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
9939 // Unless we have the short forward branch optimization.
9940 if (!Subtarget.hasConditionalMoveFusion())
9941 return DAG.getNode(
9942 ISD::OR, DL, VT,
9943 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
9944 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV),
9946 }
9947
9948 if (Op.hasOneUse()) {
9949 unsigned UseOpc = Op->user_begin()->getOpcode();
9950 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
9951 SDNode *BinOp = *Op->user_begin();
9952 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
9953 DAG, Subtarget)) {
9954 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
9955 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
9956 // may return a constant node and cause crash in lowerSELECT.
9957 if (NewSel.getOpcode() == ISD::SELECT)
9958 return lowerSELECT(NewSel, DAG);
9959 return NewSel;
9960 }
9961 }
9962 }
9963
9964 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
9965 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
9966 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
9967 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
9968 if (FPTV && FPFV) {
9969 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
9970 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
9971 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
9972 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
9973 DAG.getConstant(1, DL, XLenVT));
9974 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
9975 }
9976 }
9977
9978 // If the condition is not an integer SETCC which operates on XLenVT, we need
9979 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
9980 // (select condv, truev, falsev)
9981 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
9982 if (CondV.getOpcode() != ISD::SETCC ||
9983 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
9984 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9985 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
9986
9987 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
9988
9989 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9990 }
9991
9992 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
9993 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
9994 // advantage of the integer compare+branch instructions. i.e.:
9995 // (select (setcc lhs, rhs, cc), truev, falsev)
9996 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
9997 SDValue LHS = CondV.getOperand(0);
9998 SDValue RHS = CondV.getOperand(1);
9999 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
10000
10001 // Special case for a select of 2 constants that have a difference of 1.
10002 // Normally this is done by DAGCombine, but if the select is introduced by
10003 // type legalization or op legalization, we miss it. Restricting to SETLT
10004 // case for now because that is what signed saturating add/sub need.
10005 // FIXME: We don't need the condition to be SETLT or even a SETCC,
10006 // but we would probably want to swap the true/false values if the condition
10007 // is SETGE/SETLE to avoid an XORI.
10008 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
10009 CCVal == ISD::SETLT) {
10010 const APInt &TrueVal = TrueV->getAsAPIntVal();
10011 const APInt &FalseVal = FalseV->getAsAPIntVal();
10012 if (TrueVal - 1 == FalseVal)
10013 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
10014 if (TrueVal + 1 == FalseVal)
10015 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
10016 }
10017
10018 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
10019 // 1 < x ? x : 1 -> 0 < x ? x : 1
10020 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
10021 RHS == TrueV && LHS == FalseV) {
10022 LHS = DAG.getConstant(0, DL, VT);
10023 // 0 <u x is the same as x != 0.
10024 if (CCVal == ISD::SETULT) {
10025 std::swap(LHS, RHS);
10026 CCVal = ISD::SETNE;
10027 }
10028 }
10029
10030 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
10031 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
10032 RHS == FalseV) {
10033 RHS = DAG.getConstant(0, DL, VT);
10034 }
10035
10036 SDValue TargetCC = DAG.getCondCode(CCVal);
10037
10038 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
10039 // (select (setcc lhs, rhs, CC), constant, falsev)
10040 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
10041 std::swap(TrueV, FalseV);
10042 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
10043 }
10044
10045 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
10046 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
10047}
10048
10049SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
10050 SDValue CondV = Op.getOperand(1);
10051 SDLoc DL(Op);
10052 MVT XLenVT = Subtarget.getXLenVT();
10053
10054 if (CondV.getOpcode() == ISD::SETCC &&
10055 CondV.getOperand(0).getValueType() == XLenVT) {
10056 SDValue LHS = CondV.getOperand(0);
10057 SDValue RHS = CondV.getOperand(1);
10058 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
10059
10060 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
10061
10062 SDValue TargetCC = DAG.getCondCode(CCVal);
10063 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
10064 LHS, RHS, TargetCC, Op.getOperand(2));
10065 }
10066
10067 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
10068 CondV, DAG.getConstant(0, DL, XLenVT),
10069 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
10070}
10071
10072SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
10073 MachineFunction &MF = DAG.getMachineFunction();
10074 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
10075
10076 SDLoc DL(Op);
10077 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
10079
10080 // vastart just stores the address of the VarArgsFrameIndex slot into the
10081 // memory location argument.
10082 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
10083 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
10084 MachinePointerInfo(SV));
10085}
10086
10087SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
10088 SelectionDAG &DAG) const {
10089 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
10090 MachineFunction &MF = DAG.getMachineFunction();
10091 MachineFrameInfo &MFI = MF.getFrameInfo();
10092 MFI.setFrameAddressIsTaken(true);
10093 Register FrameReg = RI.getFrameRegister(MF);
10094 int XLenInBytes = Subtarget.getXLen() / 8;
10095
10096 EVT VT = Op.getValueType();
10097 SDLoc DL(Op);
10098 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
10099 unsigned Depth = Op.getConstantOperandVal(0);
10100 while (Depth--) {
10101 int Offset = -(XLenInBytes * 2);
10102 SDValue Ptr = DAG.getNode(
10103 ISD::ADD, DL, VT, FrameAddr,
10105 FrameAddr =
10106 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
10107 }
10108 return FrameAddr;
10109}
10110
10111SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
10112 SelectionDAG &DAG) const {
10113 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
10114 MachineFunction &MF = DAG.getMachineFunction();
10115 MachineFrameInfo &MFI = MF.getFrameInfo();
10116 MFI.setReturnAddressIsTaken(true);
10117 MVT XLenVT = Subtarget.getXLenVT();
10118 int XLenInBytes = Subtarget.getXLen() / 8;
10119
10120 EVT VT = Op.getValueType();
10121 SDLoc DL(Op);
10122 unsigned Depth = Op.getConstantOperandVal(0);
10123 if (Depth) {
10124 int Off = -XLenInBytes;
10125 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
10126 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
10127 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
10128 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
10129 MachinePointerInfo());
10130 }
10131
10132 // Return the value of the return address register, marking it an implicit
10133 // live-in.
10134 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
10135 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
10136}
10137
10138SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
10139 SelectionDAG &DAG) const {
10140 SDLoc DL(Op);
10141 SDValue Lo = Op.getOperand(0);
10142 SDValue Hi = Op.getOperand(1);
10143 SDValue Shamt = Op.getOperand(2);
10144 EVT VT = Lo.getValueType();
10145
10146 // if Shamt-XLEN < 0: // Shamt < XLEN
10147 // Lo = Lo << Shamt
10148 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
10149 // else:
10150 // Lo = 0
10151 // Hi = Lo << (Shamt-XLEN)
10152
10153 SDValue Zero = DAG.getConstant(0, DL, VT);
10154 SDValue One = DAG.getConstant(1, DL, VT);
10155 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
10156 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
10157 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
10158 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
10159
10160 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
10161 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
10162 SDValue ShiftRightLo =
10163 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
10164 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
10165 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
10166 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
10167
10168 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
10169
10170 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
10171 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
10172
10173 SDValue Parts[2] = {Lo, Hi};
10174 return DAG.getMergeValues(Parts, DL);
10175}
10176
10177SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
10178 bool IsSRA) const {
10179 SDLoc DL(Op);
10180 SDValue Lo = Op.getOperand(0);
10181 SDValue Hi = Op.getOperand(1);
10182 SDValue Shamt = Op.getOperand(2);
10183 EVT VT = Lo.getValueType();
10184
10185 // SRA expansion:
10186 // if Shamt-XLEN < 0: // Shamt < XLEN
10187 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
10188 // Hi = Hi >>s Shamt
10189 // else:
10190 // Lo = Hi >>s (Shamt-XLEN);
10191 // Hi = Hi >>s (XLEN-1)
10192 //
10193 // SRL expansion:
10194 // if Shamt-XLEN < 0: // Shamt < XLEN
10195 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
10196 // Hi = Hi >>u Shamt
10197 // else:
10198 // Lo = Hi >>u (Shamt-XLEN);
10199 // Hi = 0;
10200
10201 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
10202
10203 SDValue Zero = DAG.getConstant(0, DL, VT);
10204 SDValue One = DAG.getConstant(1, DL, VT);
10205 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
10206 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
10207 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
10208 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
10209
10210 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
10211 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
10212 SDValue ShiftLeftHi =
10213 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
10214 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
10215 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
10216 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
10217 SDValue HiFalse =
10218 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
10219
10220 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
10221
10222 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
10223 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
10224
10225 SDValue Parts[2] = {Lo, Hi};
10226 return DAG.getMergeValues(Parts, DL);
10227}
10228
10229// Lower splats of i1 types to SETCC. For each mask vector type, we have a
10230// legal equivalently-sized i8 type, so we can use that as a go-between.
10231SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
10232 SelectionDAG &DAG) const {
10233 SDLoc DL(Op);
10234 MVT VT = Op.getSimpleValueType();
10235 SDValue SplatVal = Op.getOperand(0);
10236 // All-zeros or all-ones splats are handled specially.
10237 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
10238 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
10239 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
10240 }
10241 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
10242 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
10243 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
10244 }
10245 MVT InterVT = VT.changeVectorElementType(MVT::i8);
10246 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
10247 DAG.getConstant(1, DL, SplatVal.getValueType()));
10248 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
10249 SDValue Zero = DAG.getConstant(0, DL, InterVT);
10250 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
10251}
10252
10253// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
10254// illegal (currently only vXi64 RV32).
10255// FIXME: We could also catch non-constant sign-extended i32 values and lower
10256// them to VMV_V_X_VL.
10257SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
10258 SelectionDAG &DAG) const {
10259 SDLoc DL(Op);
10260 MVT VecVT = Op.getSimpleValueType();
10261 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
10262 "Unexpected SPLAT_VECTOR_PARTS lowering");
10263
10264 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
10265 SDValue Lo = Op.getOperand(0);
10266 SDValue Hi = Op.getOperand(1);
10267
10268 MVT ContainerVT = VecVT;
10269 if (VecVT.isFixedLengthVector())
10270 ContainerVT = getContainerForFixedLengthVector(VecVT);
10271
10272 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
10273
10274 SDValue Res =
10275 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
10276
10277 if (VecVT.isFixedLengthVector())
10278 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
10279
10280 return Res;
10281}
10282
10283// Custom-lower extensions from mask vectors by using a vselect either with 1
10284// for zero/any-extension or -1 for sign-extension:
10285// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
10286// Note that any-extension is lowered identically to zero-extension.
10287SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
10288 int64_t ExtTrueVal) const {
10289 SDLoc DL(Op);
10290 MVT VecVT = Op.getSimpleValueType();
10291 SDValue Src = Op.getOperand(0);
10292 // Only custom-lower extensions from mask types
10293 assert(Src.getValueType().isVector() &&
10294 Src.getValueType().getVectorElementType() == MVT::i1);
10295
10296 if (VecVT.isScalableVector()) {
10297 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
10298 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
10299 if (Src.getOpcode() == ISD::XOR &&
10300 ISD::isConstantSplatVectorAllOnes(Src.getOperand(1).getNode()))
10301 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src.getOperand(0), SplatZero,
10302 SplatTrueVal);
10303 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
10304 }
10305
10306 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
10307 MVT I1ContainerVT =
10308 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10309
10310 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
10311
10312 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
10313
10314 MVT XLenVT = Subtarget.getXLenVT();
10315 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
10316 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
10317
10318 if (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
10319 SDValue Xor = Src.getOperand(0);
10320 if (Xor.getOpcode() == RISCVISD::VMXOR_VL) {
10321 SDValue ScalableOnes = Xor.getOperand(1);
10322 if (ScalableOnes.getOpcode() == ISD::INSERT_SUBVECTOR &&
10323 ScalableOnes.getOperand(0).isUndef() &&
10325 ScalableOnes.getOperand(1).getNode())) {
10326 CC = Xor.getOperand(0);
10327 std::swap(SplatZero, SplatTrueVal);
10328 }
10329 }
10330 }
10331
10332 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10333 DAG.getUNDEF(ContainerVT), SplatZero, VL);
10334 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10335 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
10336 SDValue Select =
10337 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
10338 SplatZero, DAG.getUNDEF(ContainerVT), VL);
10339
10340 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
10341}
10342
10343// Custom-lower truncations from vectors to mask vectors by using a mask and a
10344// setcc operation:
10345// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
10346SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
10347 SelectionDAG &DAG) const {
10348 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
10349 SDLoc DL(Op);
10350 EVT MaskVT = Op.getValueType();
10351 // Only expect to custom-lower truncations to mask types
10352 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
10353 "Unexpected type for vector mask lowering");
10354 SDValue Src = Op.getOperand(0);
10355 MVT VecVT = Src.getSimpleValueType();
10356 SDValue Mask, VL;
10357 if (IsVPTrunc) {
10358 Mask = Op.getOperand(1);
10359 VL = Op.getOperand(2);
10360 }
10361 // If this is a fixed vector, we need to convert it to a scalable vector.
10362 MVT ContainerVT = VecVT;
10363
10364 if (VecVT.isFixedLengthVector()) {
10365 ContainerVT = getContainerForFixedLengthVector(VecVT);
10366 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
10367 if (IsVPTrunc) {
10368 MVT MaskContainerVT =
10369 getContainerForFixedLengthVector(Mask.getSimpleValueType());
10370 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
10371 }
10372 }
10373
10374 if (!IsVPTrunc) {
10375 std::tie(Mask, VL) =
10376 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10377 }
10378
10379 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
10380 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
10381
10382 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10383 DAG.getUNDEF(ContainerVT), SplatOne, VL);
10384 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10385 DAG.getUNDEF(ContainerVT), SplatZero, VL);
10386
10387 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
10388 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
10389 DAG.getUNDEF(ContainerVT), Mask, VL);
10390 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
10391 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
10392 DAG.getUNDEF(MaskContainerVT), Mask, VL});
10393 if (MaskVT.isFixedLengthVector())
10394 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
10395 return Trunc;
10396}
10397
10398SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
10399 SelectionDAG &DAG) const {
10400 unsigned Opc = Op.getOpcode();
10401 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
10402 SDLoc DL(Op);
10403
10404 MVT VT = Op.getSimpleValueType();
10405 // Only custom-lower vector truncates
10406 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
10407
10408 // Truncates to mask types are handled differently
10409 if (VT.getVectorElementType() == MVT::i1)
10410 return lowerVectorMaskTruncLike(Op, DAG);
10411
10412 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
10413 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
10414 // truncate by one power of two at a time.
10415 MVT DstEltVT = VT.getVectorElementType();
10416
10417 SDValue Src = Op.getOperand(0);
10418 MVT SrcVT = Src.getSimpleValueType();
10419 MVT SrcEltVT = SrcVT.getVectorElementType();
10420
10421 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
10422 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
10423 "Unexpected vector truncate lowering");
10424
10425 MVT ContainerVT = SrcVT;
10426 SDValue Mask, VL;
10427 if (IsVPTrunc) {
10428 Mask = Op.getOperand(1);
10429 VL = Op.getOperand(2);
10430 }
10431 if (SrcVT.isFixedLengthVector()) {
10432 ContainerVT = getContainerForFixedLengthVector(SrcVT);
10433 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
10434 if (IsVPTrunc) {
10435 MVT MaskVT = getMaskTypeFor(ContainerVT);
10436 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10437 }
10438 }
10439
10440 SDValue Result = Src;
10441 if (!IsVPTrunc) {
10442 std::tie(Mask, VL) =
10443 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10444 }
10445
10446 unsigned NewOpc;
10448 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
10449 else if (Opc == ISD::TRUNCATE_USAT_U)
10450 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
10451 else
10452 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL;
10453
10454 do {
10455 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
10456 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
10457 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
10458 } while (SrcEltVT != DstEltVT);
10459
10460 if (SrcVT.isFixedLengthVector())
10461 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10462
10463 return Result;
10464}
10465
10466SDValue
10467RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
10468 SelectionDAG &DAG) const {
10469 SDLoc DL(Op);
10470 SDValue Chain = Op.getOperand(0);
10471 SDValue Src = Op.getOperand(1);
10472 MVT VT = Op.getSimpleValueType();
10473 MVT SrcVT = Src.getSimpleValueType();
10474 MVT ContainerVT = VT;
10475 if (VT.isFixedLengthVector()) {
10476 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
10477 ContainerVT =
10478 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
10479 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
10480 }
10481
10482 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10483
10484 // RVV can only widen/truncate fp to types double/half the size as the source.
10485 if ((VT.getVectorElementType() == MVT::f64 &&
10486 (SrcVT.getVectorElementType() == MVT::f16 ||
10487 SrcVT.getVectorElementType() == MVT::bf16)) ||
10488 ((VT.getVectorElementType() == MVT::f16 ||
10489 VT.getVectorElementType() == MVT::bf16) &&
10490 SrcVT.getVectorElementType() == MVT::f64)) {
10491 // For double rounding, the intermediate rounding should be round-to-odd.
10492 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
10493 ? RISCVISD::STRICT_FP_EXTEND_VL
10494 : RISCVISD::STRICT_VFNCVT_ROD_VL;
10495 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
10496 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
10497 Chain, Src, Mask, VL);
10498 Chain = Src.getValue(1);
10499 }
10500
10501 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
10502 ? RISCVISD::STRICT_FP_EXTEND_VL
10503 : RISCVISD::STRICT_FP_ROUND_VL;
10504 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
10505 Chain, Src, Mask, VL);
10506 if (VT.isFixedLengthVector()) {
10507 // StrictFP operations have two result values. Their lowered result should
10508 // have same result count.
10509 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10510 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10511 }
10512 return Res;
10513}
10514
10515SDValue
10516RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
10517 SelectionDAG &DAG) const {
10518 bool IsVP =
10519 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
10520 bool IsExtend =
10521 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
10522 // RVV can only do truncate fp to types half the size as the source. We
10523 // custom-lower f64->f16 rounds via RVV's round-to-odd float
10524 // conversion instruction.
10525 SDLoc DL(Op);
10526 MVT VT = Op.getSimpleValueType();
10527
10528 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
10529
10530 SDValue Src = Op.getOperand(0);
10531 MVT SrcVT = Src.getSimpleValueType();
10532
10533 bool IsDirectExtend =
10534 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
10535 (SrcVT.getVectorElementType() != MVT::f16 &&
10536 SrcVT.getVectorElementType() != MVT::bf16));
10537 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
10538 VT.getVectorElementType() != MVT::bf16) ||
10539 SrcVT.getVectorElementType() != MVT::f64);
10540
10541 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
10542
10543 // We have regular SD node patterns for direct non-VL extends.
10544 if (VT.isScalableVector() && IsDirectConv && !IsVP)
10545 return Op;
10546
10547 // Prepare any fixed-length vector operands.
10548 MVT ContainerVT = VT;
10549 SDValue Mask, VL;
10550 if (IsVP) {
10551 Mask = Op.getOperand(1);
10552 VL = Op.getOperand(2);
10553 }
10554 if (VT.isFixedLengthVector()) {
10555 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
10556 ContainerVT =
10557 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
10558 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
10559 if (IsVP) {
10560 MVT MaskVT = getMaskTypeFor(ContainerVT);
10561 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10562 }
10563 }
10564
10565 if (!IsVP)
10566 std::tie(Mask, VL) =
10567 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10568
10569 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
10570
10571 if (IsDirectConv) {
10572 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
10573 if (VT.isFixedLengthVector())
10574 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
10575 return Src;
10576 }
10577
10578 unsigned InterConvOpc =
10579 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
10580
10581 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
10582 SDValue IntermediateConv =
10583 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
10584 SDValue Result =
10585 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
10586 if (VT.isFixedLengthVector())
10587 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10588 return Result;
10589}
10590
10591// Given a scalable vector type and an index into it, returns the type for the
10592// smallest subvector that the index fits in. This can be used to reduce LMUL
10593// for operations like vslidedown.
10594//
10595// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
10596static std::optional<MVT>
10597getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
10598 const RISCVSubtarget &Subtarget) {
10599 assert(VecVT.isScalableVector());
10600 const unsigned EltSize = VecVT.getScalarSizeInBits();
10601 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
10602 const unsigned MinVLMAX = VectorBitsMin / EltSize;
10603 MVT SmallerVT;
10604 if (MaxIdx < MinVLMAX)
10605 SmallerVT = RISCVTargetLowering::getM1VT(VecVT);
10606 else if (MaxIdx < MinVLMAX * 2)
10607 SmallerVT =
10609 else if (MaxIdx < MinVLMAX * 4)
10610 SmallerVT = RISCVTargetLowering::getM1VT(VecVT)
10613 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
10614 return std::nullopt;
10615 return SmallerVT;
10616}
10617
10619 auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10620 if (!IdxC || isNullConstant(Idx))
10621 return false;
10622 return isUInt<5>(IdxC->getZExtValue());
10623}
10624
10625// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
10626// first position of a vector, and that vector is slid up to the insert index.
10627// By limiting the active vector length to index+1 and merging with the
10628// original vector (with an undisturbed tail policy for elements >= VL), we
10629// achieve the desired result of leaving all elements untouched except the one
10630// at VL-1, which is replaced with the desired value.
10631SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
10632 SelectionDAG &DAG) const {
10633 SDLoc DL(Op);
10634 MVT VecVT = Op.getSimpleValueType();
10635 MVT XLenVT = Subtarget.getXLenVT();
10636 SDValue Vec = Op.getOperand(0);
10637 SDValue Val = Op.getOperand(1);
10638 MVT ValVT = Val.getSimpleValueType();
10639 SDValue Idx = Op.getOperand(2);
10640
10641 if (VecVT.getVectorElementType() == MVT::i1) {
10642 // FIXME: For now we just promote to an i8 vector and insert into that,
10643 // but this is probably not optimal.
10644 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10645 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10646 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
10647 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
10648 }
10649
10650 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10651 (ValVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) {
10652 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
10653 MVT IntVT = VecVT.changeTypeToInteger();
10654 SDValue IntInsert = DAG.getNode(
10655 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
10656 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
10657 return DAG.getBitcast(VecVT, IntInsert);
10658 }
10659
10660 MVT ContainerVT = VecVT;
10661 // If the operand is a fixed-length vector, convert to a scalable one.
10662 if (VecVT.isFixedLengthVector()) {
10663 ContainerVT = getContainerForFixedLengthVector(VecVT);
10664 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10665 }
10666
10667 // If we know the index we're going to insert at, we can shrink Vec so that
10668 // we're performing the scalar inserts and slideup on a smaller LMUL.
10669 SDValue OrigVec = Vec;
10670 std::optional<unsigned> AlignedIdx;
10671 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
10672 const unsigned OrigIdx = IdxC->getZExtValue();
10673 // Do we know an upper bound on LMUL?
10674 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
10675 DL, DAG, Subtarget)) {
10676 ContainerVT = *ShrunkVT;
10677 AlignedIdx = 0;
10678 }
10679
10680 // If we're compiling for an exact VLEN value, we can always perform
10681 // the insert in m1 as we can determine the register corresponding to
10682 // the index in the register group.
10683 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10684 if (auto VLEN = Subtarget.getRealVLen(); VLEN && ContainerVT.bitsGT(M1VT)) {
10685 EVT ElemVT = VecVT.getVectorElementType();
10686 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
10687 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10688 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10689 AlignedIdx = SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10690 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10691 ContainerVT = M1VT;
10692 }
10693
10694 if (AlignedIdx)
10695 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, *AlignedIdx);
10696 }
10697
10698 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
10699 // Even i64-element vectors on RV32 can be lowered without scalar
10700 // legalization if the most-significant 32 bits of the value are not affected
10701 // by the sign-extension of the lower 32 bits.
10702 // TODO: We could also catch sign extensions of a 32-bit value.
10703 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
10704 const auto *CVal = cast<ConstantSDNode>(Val);
10705 if (isInt<32>(CVal->getSExtValue())) {
10706 IsLegalInsert = true;
10707 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
10708 }
10709 }
10710
10711 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10712
10713 SDValue ValInVec;
10714
10715 if (IsLegalInsert) {
10716 unsigned Opc =
10717 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
10718 if (isNullConstant(Idx)) {
10719 if (!VecVT.isFloatingPoint())
10720 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
10721 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
10722
10723 if (AlignedIdx)
10724 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10725 if (!VecVT.isFixedLengthVector())
10726 return Vec;
10727 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10728 }
10729
10730 // Use ri.vinsert.v.x if available.
10731 if (Subtarget.hasVendorXRivosVisni() && VecVT.isInteger() &&
10733 // Tail policy applies to elements past VLMAX (by assumption Idx < VLMAX)
10734 SDValue PolicyOp =
10736 Vec = DAG.getNode(RISCVISD::RI_VINSERT_VL, DL, ContainerVT, Vec, Val, Idx,
10737 VL, PolicyOp);
10738 if (AlignedIdx)
10739 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10740 if (!VecVT.isFixedLengthVector())
10741 return Vec;
10742 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10743 }
10744
10745 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
10746 } else {
10747 // On RV32, i64-element vectors must be specially handled to place the
10748 // value at element 0, by using two vslide1down instructions in sequence on
10749 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
10750 // this.
10751 SDValue ValLo, ValHi;
10752 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
10753 MVT I32ContainerVT =
10754 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
10755 SDValue I32Mask =
10756 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
10757 // Limit the active VL to two.
10758 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
10759 // If the Idx is 0 we can insert directly into the vector.
10760 if (isNullConstant(Idx)) {
10761 // First slide in the lo value, then the hi in above it. We use slide1down
10762 // to avoid the register group overlap constraint of vslide1up.
10763 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10764 Vec, Vec, ValLo, I32Mask, InsertI64VL);
10765 // If the source vector is undef don't pass along the tail elements from
10766 // the previous slide1down.
10767 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
10768 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10769 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
10770 // Bitcast back to the right container type.
10771 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10772
10773 if (AlignedIdx)
10774 ValInVec = DAG.getInsertSubvector(DL, OrigVec, ValInVec, *AlignedIdx);
10775 if (!VecVT.isFixedLengthVector())
10776 return ValInVec;
10777 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
10778 }
10779
10780 // First slide in the lo value, then the hi in above it. We use slide1down
10781 // to avoid the register group overlap constraint of vslide1up.
10782 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10783 DAG.getUNDEF(I32ContainerVT),
10784 DAG.getUNDEF(I32ContainerVT), ValLo,
10785 I32Mask, InsertI64VL);
10786 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10787 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
10788 I32Mask, InsertI64VL);
10789 // Bitcast back to the right container type.
10790 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10791 }
10792
10793 // Now that the value is in a vector, slide it into position.
10794 SDValue InsertVL =
10795 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
10796
10797 // Use tail agnostic policy if Idx is the last index of Vec.
10799 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
10800 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
10802 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
10803 Idx, Mask, InsertVL, Policy);
10804
10805 if (AlignedIdx)
10806 Slideup = DAG.getInsertSubvector(DL, OrigVec, Slideup, *AlignedIdx);
10807 if (!VecVT.isFixedLengthVector())
10808 return Slideup;
10809 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
10810}
10811
10812// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
10813// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
10814// types this is done using VMV_X_S to allow us to glean information about the
10815// sign bits of the result.
10816SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
10817 SelectionDAG &DAG) const {
10818 SDLoc DL(Op);
10819 SDValue Idx = Op.getOperand(1);
10820 SDValue Vec = Op.getOperand(0);
10821 EVT EltVT = Op.getValueType();
10822 MVT VecVT = Vec.getSimpleValueType();
10823 MVT XLenVT = Subtarget.getXLenVT();
10824
10825 if (VecVT.getVectorElementType() == MVT::i1) {
10826 // Use vfirst.m to extract the first bit.
10827 if (isNullConstant(Idx)) {
10828 MVT ContainerVT = VecVT;
10829 if (VecVT.isFixedLengthVector()) {
10830 ContainerVT = getContainerForFixedLengthVector(VecVT);
10831 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10832 }
10833 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10834 SDValue Vfirst =
10835 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
10836 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
10837 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10838 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10839 }
10840 if (VecVT.isFixedLengthVector()) {
10841 unsigned NumElts = VecVT.getVectorNumElements();
10842 if (NumElts >= 8) {
10843 MVT WideEltVT;
10844 unsigned WidenVecLen;
10845 SDValue ExtractElementIdx;
10846 SDValue ExtractBitIdx;
10847 unsigned MaxEEW = Subtarget.getELen();
10848 MVT LargestEltVT = MVT::getIntegerVT(
10849 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
10850 if (NumElts <= LargestEltVT.getSizeInBits()) {
10851 assert(isPowerOf2_32(NumElts) &&
10852 "the number of elements should be power of 2");
10853 WideEltVT = MVT::getIntegerVT(NumElts);
10854 WidenVecLen = 1;
10855 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
10856 ExtractBitIdx = Idx;
10857 } else {
10858 WideEltVT = LargestEltVT;
10859 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
10860 // extract element index = index / element width
10861 ExtractElementIdx = DAG.getNode(
10862 ISD::SRL, DL, XLenVT, Idx,
10863 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
10864 // mask bit index = index % element width
10865 ExtractBitIdx = DAG.getNode(
10866 ISD::AND, DL, XLenVT, Idx,
10867 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
10868 }
10869 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
10870 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
10871 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
10872 Vec, ExtractElementIdx);
10873 // Extract the bit from GPR.
10874 SDValue ShiftRight =
10875 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
10876 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
10877 DAG.getConstant(1, DL, XLenVT));
10878 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10879 }
10880 }
10881 // Otherwise, promote to an i8 vector and extract from that.
10882 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10883 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10884 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
10885 }
10886
10887 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10888 (EltVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) {
10889 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
10890 MVT IntVT = VecVT.changeTypeToInteger();
10891 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
10892 SDValue IntExtract =
10893 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
10894 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
10895 }
10896
10897 if (Subtarget.enablePExtSIMDCodeGen() && VecVT.isFixedLengthVector()) {
10898 if (VecVT != MVT::v4i16 && VecVT != MVT::v2i16 && VecVT != MVT::v8i8 &&
10899 VecVT != MVT::v4i8 && VecVT != MVT::v2i32)
10900 return SDValue();
10901 SDValue Extracted = DAG.getBitcast(XLenVT, Vec);
10902 unsigned ElemWidth = VecVT.getVectorElementType().getSizeInBits();
10903 SDValue Shamt = DAG.getNode(ISD::MUL, DL, XLenVT, Idx,
10904 DAG.getConstant(ElemWidth, DL, XLenVT));
10905 return DAG.getNode(ISD::SRL, DL, XLenVT, Extracted, Shamt);
10906 }
10907
10908 // If this is a fixed vector, we need to convert it to a scalable vector.
10909 MVT ContainerVT = VecVT;
10910 if (VecVT.isFixedLengthVector()) {
10911 ContainerVT = getContainerForFixedLengthVector(VecVT);
10912 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10913 }
10914
10915 // If we're compiling for an exact VLEN value and we have a known
10916 // constant index, we can always perform the extract in m1 (or
10917 // smaller) as we can determine the register corresponding to
10918 // the index in the register group.
10919 const auto VLen = Subtarget.getRealVLen();
10920 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10921 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
10922 MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10923 unsigned OrigIdx = IdxC->getZExtValue();
10924 EVT ElemVT = VecVT.getVectorElementType();
10925 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
10926 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10927 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10928 unsigned ExtractIdx =
10929 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10930 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, ExtractIdx);
10931 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10932 ContainerVT = M1VT;
10933 }
10934
10935 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
10936 // contains our index.
10937 std::optional<uint64_t> MaxIdx;
10938 if (VecVT.isFixedLengthVector())
10939 MaxIdx = VecVT.getVectorNumElements() - 1;
10940 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
10941 MaxIdx = IdxC->getZExtValue();
10942 if (MaxIdx) {
10943 if (auto SmallerVT =
10944 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
10945 ContainerVT = *SmallerVT;
10946 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
10947 }
10948 }
10949
10950 // Use ri.vextract.x.v if available.
10951 // TODO: Avoid index 0 and just use the vmv.x.s
10952 if (Subtarget.hasVendorXRivosVisni() && EltVT.isInteger() &&
10954 SDValue Elt = DAG.getNode(RISCVISD::RI_VEXTRACT, DL, XLenVT, Vec, Idx);
10955 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt);
10956 }
10957
10958 // If after narrowing, the required slide is still greater than LMUL2,
10959 // fallback to generic expansion and go through the stack. This is done
10960 // for a subtle reason: extracting *all* elements out of a vector is
10961 // widely expected to be linear in vector size, but because vslidedown
10962 // is linear in LMUL, performing N extracts using vslidedown becomes
10963 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
10964 // seems to have the same problem (the store is linear in LMUL), but the
10965 // generic expansion *memoizes* the store, and thus for many extracts of
10966 // the same vector we end up with one store and a bunch of loads.
10967 // TODO: We don't have the same code for insert_vector_elt because we
10968 // have BUILD_VECTOR and handle the degenerate case there. Should we
10969 // consider adding an inverse BUILD_VECTOR node?
10970 MVT LMUL2VT =
10972 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
10973 return SDValue();
10974
10975 // If the index is 0, the vector is already in the right position.
10976 if (!isNullConstant(Idx)) {
10977 // Use a VL of 1 to avoid processing more elements than we need.
10978 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
10979 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10980 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
10981 }
10982
10983 if (!EltVT.isInteger()) {
10984 // Floating-point extracts are handled in TableGen.
10985 return DAG.getExtractVectorElt(DL, EltVT, Vec, 0);
10986 }
10987
10988 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
10989 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
10990}
10991
10992// Some RVV intrinsics may claim that they want an integer operand to be
10993// promoted or expanded.
10995 const RISCVSubtarget &Subtarget) {
10996 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
10997 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
10998 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
10999 "Unexpected opcode");
11000
11001 if (!Subtarget.hasVInstructions())
11002 return SDValue();
11003
11004 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
11005 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
11006 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
11007
11008 SDLoc DL(Op);
11009
11011 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
11012 if (!II || !II->hasScalarOperand())
11013 return SDValue();
11014
11015 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
11016 assert(SplatOp < Op.getNumOperands());
11017
11018 SmallVector<SDValue, 8> Operands(Op->ops());
11019 SDValue &ScalarOp = Operands[SplatOp];
11020 MVT OpVT = ScalarOp.getSimpleValueType();
11021 MVT XLenVT = Subtarget.getXLenVT();
11022
11023 // If this isn't a scalar, or its type is XLenVT we're done.
11024 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
11025 return SDValue();
11026
11027 // Simplest case is that the operand needs to be promoted to XLenVT.
11028 if (OpVT.bitsLT(XLenVT)) {
11029 // If the operand is a constant, sign extend to increase our chances
11030 // of being able to use a .vi instruction. ANY_EXTEND would become a
11031 // a zero extend and the simm5 check in isel would fail.
11032 // FIXME: Should we ignore the upper bits in isel instead?
11033 unsigned ExtOpc =
11035 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
11036 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
11037 }
11038
11039 // Use the previous operand to get the vXi64 VT. The result might be a mask
11040 // VT for compares. Using the previous operand assumes that the previous
11041 // operand will never have a smaller element size than a scalar operand and
11042 // that a widening operation never uses SEW=64.
11043 // NOTE: If this fails the below assert, we can probably just find the
11044 // element count from any operand or result and use it to construct the VT.
11045 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
11046 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
11047
11048 // The more complex case is when the scalar is larger than XLenVT.
11049 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
11050 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
11051
11052 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
11053 // instruction to sign-extend since SEW>XLEN.
11054 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
11055 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
11056 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
11057 }
11058
11059 switch (IntNo) {
11060 case Intrinsic::riscv_vslide1up:
11061 case Intrinsic::riscv_vslide1down:
11062 case Intrinsic::riscv_vslide1up_mask:
11063 case Intrinsic::riscv_vslide1down_mask: {
11064 // We need to special case these when the scalar is larger than XLen.
11065 unsigned NumOps = Op.getNumOperands();
11066 bool IsMasked = NumOps == 7;
11067
11068 // Convert the vector source to the equivalent nxvXi32 vector.
11069 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
11070 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
11071 SDValue ScalarLo, ScalarHi;
11072 std::tie(ScalarLo, ScalarHi) =
11073 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
11074
11075 // Double the VL since we halved SEW.
11076 SDValue AVL = getVLOperand(Op);
11077 SDValue I32VL;
11078
11079 // Optimize for constant AVL
11080 if (isa<ConstantSDNode>(AVL)) {
11081 const auto [MinVLMAX, MaxVLMAX] =
11083
11084 uint64_t AVLInt = AVL->getAsZExtVal();
11085 if (AVLInt <= MinVLMAX) {
11086 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
11087 } else if (AVLInt >= 2 * MaxVLMAX) {
11088 // Just set vl to VLMAX in this situation
11089 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
11090 } else {
11091 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
11092 // is related to the hardware implementation.
11093 // So let the following code handle
11094 }
11095 }
11096 if (!I32VL) {
11098 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
11099 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
11100 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
11101 SDValue SETVL =
11102 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
11103 // Using vsetvli instruction to get actually used length which related to
11104 // the hardware implementation
11105 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
11106 SEW, LMUL);
11107 I32VL =
11108 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
11109 }
11110
11111 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
11112
11113 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
11114 // instructions.
11115 SDValue Passthru;
11116 if (IsMasked)
11117 Passthru = DAG.getUNDEF(I32VT);
11118 else
11119 Passthru = DAG.getBitcast(I32VT, Operands[1]);
11120
11121 if (IntNo == Intrinsic::riscv_vslide1up ||
11122 IntNo == Intrinsic::riscv_vslide1up_mask) {
11123 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
11124 ScalarHi, I32Mask, I32VL);
11125 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
11126 ScalarLo, I32Mask, I32VL);
11127 } else {
11128 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
11129 ScalarLo, I32Mask, I32VL);
11130 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
11131 ScalarHi, I32Mask, I32VL);
11132 }
11133
11134 // Convert back to nxvXi64.
11135 Vec = DAG.getBitcast(VT, Vec);
11136
11137 if (!IsMasked)
11138 return Vec;
11139 // Apply mask after the operation.
11140 SDValue Mask = Operands[NumOps - 3];
11141 SDValue MaskedOff = Operands[1];
11142 // Assume Policy operand is the last operand.
11143 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
11144 // We don't need to select maskedoff if it's undef.
11145 if (MaskedOff.isUndef())
11146 return Vec;
11147 // TAMU
11148 if (Policy == RISCVVType::TAIL_AGNOSTIC)
11149 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
11150 DAG.getUNDEF(VT), AVL);
11151 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
11152 // It's fine because vmerge does not care mask policy.
11153 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
11154 MaskedOff, AVL);
11155 }
11156 }
11157
11158 // We need to convert the scalar to a splat vector.
11159 SDValue VL = getVLOperand(Op);
11160 assert(VL.getValueType() == XLenVT);
11161 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
11162 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
11163}
11164
11165// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
11166// scalable vector llvm.get.vector.length for now.
11167//
11168// We need to convert from a scalable VF to a vsetvli with VLMax equal to
11169// (vscale * VF). The vscale and VF are independent of element width. We use
11170// SEW=8 for the vsetvli because it is the only element width that supports all
11171// fractional LMULs. The LMUL is chosen so that with SEW=8 the VLMax is
11172// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
11173// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
11174// SEW and LMUL are better for the surrounding vector instructions.
11176 const RISCVSubtarget &Subtarget) {
11177 MVT XLenVT = Subtarget.getXLenVT();
11178
11179 // The smallest LMUL is only valid for the smallest element width.
11180 const unsigned ElementWidth = 8;
11181
11182 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
11183 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
11184 // We don't support VF==1 with ELEN==32.
11185 [[maybe_unused]] unsigned MinVF =
11186 RISCV::RVVBitsPerBlock / Subtarget.getELen();
11187
11188 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
11189 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
11190 "Unexpected VF");
11191
11192 bool Fractional = VF < LMul1VF;
11193 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
11194 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
11195 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
11196
11197 SDLoc DL(N);
11198
11199 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
11200 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
11201
11202 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
11203
11204 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
11205 SDValue Res =
11206 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
11207 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
11208}
11209
11211 const RISCVSubtarget &Subtarget) {
11212 SDValue Op0 = N->getOperand(1);
11213 MVT OpVT = Op0.getSimpleValueType();
11214 MVT ContainerVT = OpVT;
11215 if (OpVT.isFixedLengthVector()) {
11216 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
11217 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
11218 }
11219 MVT XLenVT = Subtarget.getXLenVT();
11220 SDLoc DL(N);
11221 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
11222 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
11223 if (isOneConstant(N->getOperand(2)))
11224 return Res;
11225
11226 // Convert -1 to VL.
11227 SDValue Setcc =
11228 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
11229 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
11230 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
11231}
11232
11233static inline void promoteVCIXScalar(SDValue Op,
11234 MutableArrayRef<SDValue> Operands,
11235 SelectionDAG &DAG) {
11236 const RISCVSubtarget &Subtarget =
11238
11239 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
11240 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
11241 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
11242 SDLoc DL(Op);
11243
11245 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
11246 if (!II || !II->hasScalarOperand())
11247 return;
11248
11249 unsigned SplatOp = II->ScalarOperand + 1;
11250 assert(SplatOp < Op.getNumOperands());
11251
11252 SDValue &ScalarOp = Operands[SplatOp];
11253 MVT OpVT = ScalarOp.getSimpleValueType();
11254 MVT XLenVT = Subtarget.getXLenVT();
11255
11256 // The code below is partially copied from lowerVectorIntrinsicScalars.
11257 // If this isn't a scalar, or its type is XLenVT we're done.
11258 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
11259 return;
11260
11261 // Manually emit promote operation for scalar operation.
11262 if (OpVT.bitsLT(XLenVT)) {
11263 unsigned ExtOpc =
11265 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
11266 }
11267}
11268
11269static void processVCIXOperands(SDValue OrigOp,
11270 MutableArrayRef<SDValue> Operands,
11271 SelectionDAG &DAG) {
11272 promoteVCIXScalar(OrigOp, Operands, DAG);
11273 const RISCVSubtarget &Subtarget =
11275 for (SDValue &V : Operands) {
11276 EVT ValType = V.getValueType();
11277 if (ValType.isVector() && ValType.isFloatingPoint()) {
11278 MVT InterimIVT =
11279 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
11280 ValType.getVectorElementCount());
11281 V = DAG.getBitcast(InterimIVT, V);
11282 }
11283 if (ValType.isFixedLengthVector()) {
11284 MVT OpContainerVT = getContainerForFixedLengthVector(
11285 DAG, V.getSimpleValueType(), Subtarget);
11286 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
11287 }
11288 }
11289}
11290
11291// LMUL * VLEN should be greater than or equal to EGS * SEW
11292static inline bool isValidEGW(int EGS, EVT VT,
11293 const RISCVSubtarget &Subtarget) {
11294 return (Subtarget.getRealMinVLen() *
11296 EGS * VT.getScalarSizeInBits();
11297}
11298
11299SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11300 SelectionDAG &DAG) const {
11301 unsigned IntNo = Op.getConstantOperandVal(0);
11302 SDLoc DL(Op);
11303 MVT XLenVT = Subtarget.getXLenVT();
11304
11305 switch (IntNo) {
11306 default:
11307 break; // Don't custom lower most intrinsics.
11308 case Intrinsic::riscv_tuple_insert: {
11309 SDValue Vec = Op.getOperand(1);
11310 SDValue SubVec = Op.getOperand(2);
11311 SDValue Index = Op.getOperand(3);
11312
11313 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
11314 SubVec, Index);
11315 }
11316 case Intrinsic::riscv_tuple_extract: {
11317 SDValue Vec = Op.getOperand(1);
11318 SDValue Index = Op.getOperand(2);
11319
11320 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
11321 Index);
11322 }
11323 case Intrinsic::thread_pointer: {
11324 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11325 return DAG.getRegister(RISCV::X4, PtrVT);
11326 }
11327 case Intrinsic::riscv_orc_b:
11328 case Intrinsic::riscv_brev8:
11329 case Intrinsic::riscv_sha256sig0:
11330 case Intrinsic::riscv_sha256sig1:
11331 case Intrinsic::riscv_sha256sum0:
11332 case Intrinsic::riscv_sha256sum1:
11333 case Intrinsic::riscv_sm3p0:
11334 case Intrinsic::riscv_sm3p1: {
11335 unsigned Opc;
11336 switch (IntNo) {
11337 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
11338 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
11339 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
11340 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
11341 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
11342 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
11343 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
11344 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
11345 }
11346
11347 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
11348 }
11349 case Intrinsic::riscv_sm4ks:
11350 case Intrinsic::riscv_sm4ed: {
11351 unsigned Opc =
11352 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
11353
11354 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
11355 Op.getOperand(3));
11356 }
11357 case Intrinsic::riscv_zip:
11358 case Intrinsic::riscv_unzip: {
11359 unsigned Opc =
11360 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
11361 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
11362 }
11363 case Intrinsic::riscv_mopr:
11364 return DAG.getNode(RISCVISD::MOP_R, DL, XLenVT, Op.getOperand(1),
11365 Op.getOperand(2));
11366
11367 case Intrinsic::riscv_moprr: {
11368 return DAG.getNode(RISCVISD::MOP_RR, DL, XLenVT, Op.getOperand(1),
11369 Op.getOperand(2), Op.getOperand(3));
11370 }
11371 case Intrinsic::riscv_clmul:
11372 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
11373 Op.getOperand(2));
11374 case Intrinsic::riscv_clmulh:
11375 case Intrinsic::riscv_clmulr: {
11376 unsigned Opc =
11377 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
11378 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
11379 }
11380 case Intrinsic::experimental_get_vector_length:
11381 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
11382 case Intrinsic::experimental_cttz_elts:
11383 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
11384 case Intrinsic::riscv_vmv_x_s: {
11385 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
11386 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
11387 }
11388 case Intrinsic::riscv_vfmv_f_s:
11389 return DAG.getExtractVectorElt(DL, Op.getValueType(), Op.getOperand(1), 0);
11390 case Intrinsic::riscv_vmv_v_x:
11391 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
11392 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
11393 Subtarget);
11394 case Intrinsic::riscv_vfmv_v_f:
11395 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
11396 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
11397 case Intrinsic::riscv_vmv_s_x: {
11398 SDValue Scalar = Op.getOperand(2);
11399
11400 if (Scalar.getValueType().bitsLE(XLenVT)) {
11401 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
11402 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
11403 Op.getOperand(1), Scalar, Op.getOperand(3));
11404 }
11405
11406 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
11407
11408 // This is an i64 value that lives in two scalar registers. We have to
11409 // insert this in a convoluted way. First we build vXi64 splat containing
11410 // the two values that we assemble using some bit math. Next we'll use
11411 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
11412 // to merge element 0 from our splat into the source vector.
11413 // FIXME: This is probably not the best way to do this, but it is
11414 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
11415 // point.
11416 // sw lo, (a0)
11417 // sw hi, 4(a0)
11418 // vlse vX, (a0)
11419 //
11420 // vid.v vVid
11421 // vmseq.vx mMask, vVid, 0
11422 // vmerge.vvm vDest, vSrc, vVal, mMask
11423 MVT VT = Op.getSimpleValueType();
11424 SDValue Vec = Op.getOperand(1);
11425 SDValue VL = getVLOperand(Op);
11426
11427 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
11428 if (Op.getOperand(1).isUndef())
11429 return SplattedVal;
11430 SDValue SplattedIdx =
11431 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
11432 DAG.getConstant(0, DL, MVT::i32), VL);
11433
11434 MVT MaskVT = getMaskTypeFor(VT);
11435 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
11436 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
11437 SDValue SelectCond =
11438 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
11439 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
11440 DAG.getUNDEF(MaskVT), Mask, VL});
11441 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
11442 Vec, DAG.getUNDEF(VT), VL);
11443 }
11444 case Intrinsic::riscv_vfmv_s_f:
11445 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
11446 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
11447 // EGS * EEW >= 128 bits
11448 case Intrinsic::riscv_vaesdf_vv:
11449 case Intrinsic::riscv_vaesdf_vs:
11450 case Intrinsic::riscv_vaesdm_vv:
11451 case Intrinsic::riscv_vaesdm_vs:
11452 case Intrinsic::riscv_vaesef_vv:
11453 case Intrinsic::riscv_vaesef_vs:
11454 case Intrinsic::riscv_vaesem_vv:
11455 case Intrinsic::riscv_vaesem_vs:
11456 case Intrinsic::riscv_vaeskf1:
11457 case Intrinsic::riscv_vaeskf2:
11458 case Intrinsic::riscv_vaesz_vs:
11459 case Intrinsic::riscv_vsm4k:
11460 case Intrinsic::riscv_vsm4r_vv:
11461 case Intrinsic::riscv_vsm4r_vs: {
11462 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
11463 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
11464 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
11465 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
11466 return Op;
11467 }
11468 // EGS * EEW >= 256 bits
11469 case Intrinsic::riscv_vsm3c:
11470 case Intrinsic::riscv_vsm3me: {
11471 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
11472 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
11473 reportFatalUsageError("EGW should be greater than or equal to 8 * SEW.");
11474 return Op;
11475 }
11476 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
11477 case Intrinsic::riscv_vsha2ch:
11478 case Intrinsic::riscv_vsha2cl:
11479 case Intrinsic::riscv_vsha2ms: {
11480 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
11481 !Subtarget.hasStdExtZvknhb())
11482 reportFatalUsageError("SEW=64 needs Zvknhb to be enabled.");
11483 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
11484 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
11485 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
11486 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
11487 return Op;
11488 }
11489 case Intrinsic::riscv_sf_vc_v_x:
11490 case Intrinsic::riscv_sf_vc_v_i:
11491 case Intrinsic::riscv_sf_vc_v_xv:
11492 case Intrinsic::riscv_sf_vc_v_iv:
11493 case Intrinsic::riscv_sf_vc_v_vv:
11494 case Intrinsic::riscv_sf_vc_v_fv:
11495 case Intrinsic::riscv_sf_vc_v_xvv:
11496 case Intrinsic::riscv_sf_vc_v_ivv:
11497 case Intrinsic::riscv_sf_vc_v_vvv:
11498 case Intrinsic::riscv_sf_vc_v_fvv:
11499 case Intrinsic::riscv_sf_vc_v_xvw:
11500 case Intrinsic::riscv_sf_vc_v_ivw:
11501 case Intrinsic::riscv_sf_vc_v_vvw:
11502 case Intrinsic::riscv_sf_vc_v_fvw: {
11503 MVT VT = Op.getSimpleValueType();
11504
11505 SmallVector<SDValue> Operands{Op->op_values()};
11506 processVCIXOperands(Op, Operands, DAG);
11507
11508 MVT RetVT = VT;
11509 if (VT.isFixedLengthVector())
11511 else if (VT.isFloatingPoint())
11514
11515 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
11516
11517 if (VT.isFixedLengthVector())
11518 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
11519 else if (VT.isFloatingPoint())
11520 NewNode = DAG.getBitcast(VT, NewNode);
11521
11522 if (Op == NewNode)
11523 break;
11524
11525 return NewNode;
11526 }
11527 }
11528
11529 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11530}
11531
11533 unsigned Type) {
11534 SDLoc DL(Op);
11535 SmallVector<SDValue> Operands{Op->op_values()};
11536 Operands.erase(Operands.begin() + 1);
11537
11538 const RISCVSubtarget &Subtarget =
11540 MVT VT = Op.getSimpleValueType();
11541 MVT RetVT = VT;
11542 MVT FloatVT = VT;
11543
11544 if (VT.isFloatingPoint()) {
11545 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
11546 VT.getVectorElementCount());
11547 FloatVT = RetVT;
11548 }
11549 if (VT.isFixedLengthVector())
11551 Subtarget);
11552
11553 processVCIXOperands(Op, Operands, DAG);
11554
11555 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
11556 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
11557 SDValue Chain = NewNode.getValue(1);
11558
11559 if (VT.isFixedLengthVector())
11560 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
11561 if (VT.isFloatingPoint())
11562 NewNode = DAG.getBitcast(VT, NewNode);
11563
11564 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
11565
11566 return NewNode;
11567}
11568
11570 unsigned Type) {
11571 SmallVector<SDValue> Operands{Op->op_values()};
11572 Operands.erase(Operands.begin() + 1);
11573 processVCIXOperands(Op, Operands, DAG);
11574
11575 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
11576}
11577
11578static SDValue
11580 const RISCVSubtarget &Subtarget,
11581 SelectionDAG &DAG) {
11582 bool IsStrided;
11583 switch (IntNo) {
11584 case Intrinsic::riscv_seg2_load_mask:
11585 case Intrinsic::riscv_seg3_load_mask:
11586 case Intrinsic::riscv_seg4_load_mask:
11587 case Intrinsic::riscv_seg5_load_mask:
11588 case Intrinsic::riscv_seg6_load_mask:
11589 case Intrinsic::riscv_seg7_load_mask:
11590 case Intrinsic::riscv_seg8_load_mask:
11591 IsStrided = false;
11592 break;
11593 case Intrinsic::riscv_sseg2_load_mask:
11594 case Intrinsic::riscv_sseg3_load_mask:
11595 case Intrinsic::riscv_sseg4_load_mask:
11596 case Intrinsic::riscv_sseg5_load_mask:
11597 case Intrinsic::riscv_sseg6_load_mask:
11598 case Intrinsic::riscv_sseg7_load_mask:
11599 case Intrinsic::riscv_sseg8_load_mask:
11600 IsStrided = true;
11601 break;
11602 default:
11603 llvm_unreachable("unexpected intrinsic ID");
11604 };
11605
11606 static const Intrinsic::ID VlsegInts[7] = {
11607 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
11608 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
11609 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
11610 Intrinsic::riscv_vlseg8_mask};
11611 static const Intrinsic::ID VlssegInts[7] = {
11612 Intrinsic::riscv_vlsseg2_mask, Intrinsic::riscv_vlsseg3_mask,
11613 Intrinsic::riscv_vlsseg4_mask, Intrinsic::riscv_vlsseg5_mask,
11614 Intrinsic::riscv_vlsseg6_mask, Intrinsic::riscv_vlsseg7_mask,
11615 Intrinsic::riscv_vlsseg8_mask};
11616
11617 SDLoc DL(Op);
11618 unsigned NF = Op->getNumValues() - 1;
11619 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11620 MVT XLenVT = Subtarget.getXLenVT();
11621 MVT VT = Op->getSimpleValueType(0);
11622 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11623 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11624 ContainerVT.getScalarSizeInBits();
11625 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11626
11627 // Operands: (chain, int_id, pointer, mask, vl) or
11628 // (chain, int_id, pointer, offset, mask, vl)
11629 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11630 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11631 MVT MaskVT = Mask.getSimpleValueType();
11632 MVT MaskContainerVT =
11633 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11634 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11635
11636 SDValue IntID = DAG.getTargetConstant(
11637 IsStrided ? VlssegInts[NF - 2] : VlsegInts[NF - 2], DL, XLenVT);
11638 auto *Load = cast<MemIntrinsicSDNode>(Op);
11639
11640 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
11642 Load->getChain(),
11643 IntID,
11644 DAG.getUNDEF(VecTupTy),
11645 Op.getOperand(2),
11646 Mask,
11647 VL,
11650 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11651 // Insert the stride operand.
11652 if (IsStrided)
11653 Ops.insert(std::next(Ops.begin(), 4), Op.getOperand(3));
11654
11655 SDValue Result =
11657 Load->getMemoryVT(), Load->getMemOperand());
11659 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
11660 SDValue SubVec = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
11661 Result.getValue(0),
11662 DAG.getTargetConstant(RetIdx, DL, MVT::i32));
11663 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
11664 }
11665 Results.push_back(Result.getValue(1));
11666 return DAG.getMergeValues(Results, DL);
11667}
11668
11669SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
11670 SelectionDAG &DAG) const {
11671 unsigned IntNo = Op.getConstantOperandVal(1);
11672 switch (IntNo) {
11673 default:
11674 break;
11675 case Intrinsic::riscv_seg2_load_mask:
11676 case Intrinsic::riscv_seg3_load_mask:
11677 case Intrinsic::riscv_seg4_load_mask:
11678 case Intrinsic::riscv_seg5_load_mask:
11679 case Intrinsic::riscv_seg6_load_mask:
11680 case Intrinsic::riscv_seg7_load_mask:
11681 case Intrinsic::riscv_seg8_load_mask:
11682 case Intrinsic::riscv_sseg2_load_mask:
11683 case Intrinsic::riscv_sseg3_load_mask:
11684 case Intrinsic::riscv_sseg4_load_mask:
11685 case Intrinsic::riscv_sseg5_load_mask:
11686 case Intrinsic::riscv_sseg6_load_mask:
11687 case Intrinsic::riscv_sseg7_load_mask:
11688 case Intrinsic::riscv_sseg8_load_mask:
11689 return lowerFixedVectorSegLoadIntrinsics(IntNo, Op, Subtarget, DAG);
11690
11691 case Intrinsic::riscv_sf_vc_v_x_se:
11692 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE);
11693 case Intrinsic::riscv_sf_vc_v_i_se:
11694 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_I_SE);
11695 case Intrinsic::riscv_sf_vc_v_xv_se:
11696 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XV_SE);
11697 case Intrinsic::riscv_sf_vc_v_iv_se:
11698 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IV_SE);
11699 case Intrinsic::riscv_sf_vc_v_vv_se:
11700 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VV_SE);
11701 case Intrinsic::riscv_sf_vc_v_fv_se:
11702 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FV_SE);
11703 case Intrinsic::riscv_sf_vc_v_xvv_se:
11704 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVV_SE);
11705 case Intrinsic::riscv_sf_vc_v_ivv_se:
11706 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVV_SE);
11707 case Intrinsic::riscv_sf_vc_v_vvv_se:
11708 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVV_SE);
11709 case Intrinsic::riscv_sf_vc_v_fvv_se:
11710 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVV_SE);
11711 case Intrinsic::riscv_sf_vc_v_xvw_se:
11712 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVW_SE);
11713 case Intrinsic::riscv_sf_vc_v_ivw_se:
11714 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVW_SE);
11715 case Intrinsic::riscv_sf_vc_v_vvw_se:
11716 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVW_SE);
11717 case Intrinsic::riscv_sf_vc_v_fvw_se:
11718 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVW_SE);
11719 }
11720
11721 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11722}
11723
11724static SDValue
11726 const RISCVSubtarget &Subtarget,
11727 SelectionDAG &DAG) {
11728 bool IsStrided;
11729 switch (IntNo) {
11730 case Intrinsic::riscv_seg2_store_mask:
11731 case Intrinsic::riscv_seg3_store_mask:
11732 case Intrinsic::riscv_seg4_store_mask:
11733 case Intrinsic::riscv_seg5_store_mask:
11734 case Intrinsic::riscv_seg6_store_mask:
11735 case Intrinsic::riscv_seg7_store_mask:
11736 case Intrinsic::riscv_seg8_store_mask:
11737 IsStrided = false;
11738 break;
11739 case Intrinsic::riscv_sseg2_store_mask:
11740 case Intrinsic::riscv_sseg3_store_mask:
11741 case Intrinsic::riscv_sseg4_store_mask:
11742 case Intrinsic::riscv_sseg5_store_mask:
11743 case Intrinsic::riscv_sseg6_store_mask:
11744 case Intrinsic::riscv_sseg7_store_mask:
11745 case Intrinsic::riscv_sseg8_store_mask:
11746 IsStrided = true;
11747 break;
11748 default:
11749 llvm_unreachable("unexpected intrinsic ID");
11750 }
11751
11752 SDLoc DL(Op);
11753 static const Intrinsic::ID VssegInts[] = {
11754 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
11755 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
11756 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
11757 Intrinsic::riscv_vsseg8_mask};
11758 static const Intrinsic::ID VsssegInts[] = {
11759 Intrinsic::riscv_vssseg2_mask, Intrinsic::riscv_vssseg3_mask,
11760 Intrinsic::riscv_vssseg4_mask, Intrinsic::riscv_vssseg5_mask,
11761 Intrinsic::riscv_vssseg6_mask, Intrinsic::riscv_vssseg7_mask,
11762 Intrinsic::riscv_vssseg8_mask};
11763
11764 // Operands: (chain, int_id, vec*, ptr, mask, vl) or
11765 // (chain, int_id, vec*, ptr, stride, mask, vl)
11766 unsigned NF = Op->getNumOperands() - (IsStrided ? 6 : 5);
11767 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11768 MVT XLenVT = Subtarget.getXLenVT();
11769 MVT VT = Op->getOperand(2).getSimpleValueType();
11770 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11771 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11772 ContainerVT.getScalarSizeInBits();
11773 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11774
11775 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11776 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11777 MVT MaskVT = Mask.getSimpleValueType();
11778 MVT MaskContainerVT =
11779 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11780 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11781
11782 SDValue IntID = DAG.getTargetConstant(
11783 IsStrided ? VsssegInts[NF - 2] : VssegInts[NF - 2], DL, XLenVT);
11784 SDValue Ptr = Op->getOperand(NF + 2);
11785
11786 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
11787
11788 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
11789 for (unsigned i = 0; i < NF; i++)
11790 StoredVal = DAG.getNode(
11791 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
11792 convertToScalableVector(ContainerVT, FixedIntrinsic->getOperand(2 + i),
11793 DAG, Subtarget),
11794 DAG.getTargetConstant(i, DL, MVT::i32));
11795
11797 FixedIntrinsic->getChain(),
11798 IntID,
11799 StoredVal,
11800 Ptr,
11801 Mask,
11802 VL,
11803 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11804 // Insert the stride operand.
11805 if (IsStrided)
11806 Ops.insert(std::next(Ops.begin(), 4),
11807 Op.getOperand(Op.getNumOperands() - 3));
11808
11809 return DAG.getMemIntrinsicNode(
11810 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
11811 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
11812}
11813
11814SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11815 SelectionDAG &DAG) const {
11816 unsigned IntNo = Op.getConstantOperandVal(1);
11817 switch (IntNo) {
11818 default:
11819 break;
11820 case Intrinsic::riscv_seg2_store_mask:
11821 case Intrinsic::riscv_seg3_store_mask:
11822 case Intrinsic::riscv_seg4_store_mask:
11823 case Intrinsic::riscv_seg5_store_mask:
11824 case Intrinsic::riscv_seg6_store_mask:
11825 case Intrinsic::riscv_seg7_store_mask:
11826 case Intrinsic::riscv_seg8_store_mask:
11827 case Intrinsic::riscv_sseg2_store_mask:
11828 case Intrinsic::riscv_sseg3_store_mask:
11829 case Intrinsic::riscv_sseg4_store_mask:
11830 case Intrinsic::riscv_sseg5_store_mask:
11831 case Intrinsic::riscv_sseg6_store_mask:
11832 case Intrinsic::riscv_sseg7_store_mask:
11833 case Intrinsic::riscv_sseg8_store_mask:
11834 return lowerFixedVectorSegStoreIntrinsics(IntNo, Op, Subtarget, DAG);
11835
11836 case Intrinsic::riscv_sf_vc_xv_se:
11837 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);
11838 case Intrinsic::riscv_sf_vc_iv_se:
11839 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IV_SE);
11840 case Intrinsic::riscv_sf_vc_vv_se:
11841 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VV_SE);
11842 case Intrinsic::riscv_sf_vc_fv_se:
11843 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FV_SE);
11844 case Intrinsic::riscv_sf_vc_xvv_se:
11845 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVV_SE);
11846 case Intrinsic::riscv_sf_vc_ivv_se:
11847 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVV_SE);
11848 case Intrinsic::riscv_sf_vc_vvv_se:
11849 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVV_SE);
11850 case Intrinsic::riscv_sf_vc_fvv_se:
11851 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVV_SE);
11852 case Intrinsic::riscv_sf_vc_xvw_se:
11853 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVW_SE);
11854 case Intrinsic::riscv_sf_vc_ivw_se:
11855 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVW_SE);
11856 case Intrinsic::riscv_sf_vc_vvw_se:
11857 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVW_SE);
11858 case Intrinsic::riscv_sf_vc_fvw_se:
11859 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVW_SE);
11860 }
11861
11862 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11863}
11864
11865static unsigned getRVVReductionOp(unsigned ISDOpcode) {
11866 switch (ISDOpcode) {
11867 default:
11868 llvm_unreachable("Unhandled reduction");
11869 case ISD::VP_REDUCE_ADD:
11870 case ISD::VECREDUCE_ADD:
11871 return RISCVISD::VECREDUCE_ADD_VL;
11872 case ISD::VP_REDUCE_UMAX:
11874 return RISCVISD::VECREDUCE_UMAX_VL;
11875 case ISD::VP_REDUCE_SMAX:
11877 return RISCVISD::VECREDUCE_SMAX_VL;
11878 case ISD::VP_REDUCE_UMIN:
11880 return RISCVISD::VECREDUCE_UMIN_VL;
11881 case ISD::VP_REDUCE_SMIN:
11883 return RISCVISD::VECREDUCE_SMIN_VL;
11884 case ISD::VP_REDUCE_AND:
11885 case ISD::VECREDUCE_AND:
11886 return RISCVISD::VECREDUCE_AND_VL;
11887 case ISD::VP_REDUCE_OR:
11888 case ISD::VECREDUCE_OR:
11889 return RISCVISD::VECREDUCE_OR_VL;
11890 case ISD::VP_REDUCE_XOR:
11891 case ISD::VECREDUCE_XOR:
11892 return RISCVISD::VECREDUCE_XOR_VL;
11893 case ISD::VP_REDUCE_FADD:
11894 return RISCVISD::VECREDUCE_FADD_VL;
11895 case ISD::VP_REDUCE_SEQ_FADD:
11896 return RISCVISD::VECREDUCE_SEQ_FADD_VL;
11897 case ISD::VP_REDUCE_FMAX:
11898 case ISD::VP_REDUCE_FMAXIMUM:
11899 return RISCVISD::VECREDUCE_FMAX_VL;
11900 case ISD::VP_REDUCE_FMIN:
11901 case ISD::VP_REDUCE_FMINIMUM:
11902 return RISCVISD::VECREDUCE_FMIN_VL;
11903 }
11904
11905}
11906
11907SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
11908 SelectionDAG &DAG,
11909 bool IsVP) const {
11910 SDLoc DL(Op);
11911 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
11912 MVT VecVT = Vec.getSimpleValueType();
11913 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
11914 Op.getOpcode() == ISD::VECREDUCE_OR ||
11915 Op.getOpcode() == ISD::VECREDUCE_XOR ||
11916 Op.getOpcode() == ISD::VP_REDUCE_AND ||
11917 Op.getOpcode() == ISD::VP_REDUCE_OR ||
11918 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
11919 "Unexpected reduction lowering");
11920
11921 MVT XLenVT = Subtarget.getXLenVT();
11922
11923 MVT ContainerVT = VecVT;
11924 if (VecVT.isFixedLengthVector()) {
11925 ContainerVT = getContainerForFixedLengthVector(VecVT);
11926 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11927 }
11928
11929 SDValue Mask, VL;
11930 if (IsVP) {
11931 Mask = Op.getOperand(2);
11932 VL = Op.getOperand(3);
11933 } else {
11934 std::tie(Mask, VL) =
11935 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11936 }
11937
11938 ISD::CondCode CC;
11939 switch (Op.getOpcode()) {
11940 default:
11941 llvm_unreachable("Unhandled reduction");
11942 case ISD::VECREDUCE_AND:
11943 case ISD::VP_REDUCE_AND: {
11944 // vcpop ~x == 0
11945 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11946 if (IsVP || VecVT.isFixedLengthVector())
11947 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
11948 else
11949 Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
11950 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11951 CC = ISD::SETEQ;
11952 break;
11953 }
11954 case ISD::VECREDUCE_OR:
11955 case ISD::VP_REDUCE_OR:
11956 // vcpop x != 0
11957 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11958 CC = ISD::SETNE;
11959 break;
11960 case ISD::VECREDUCE_XOR:
11961 case ISD::VP_REDUCE_XOR: {
11962 // ((vcpop x) & 1) != 0
11963 SDValue One = DAG.getConstant(1, DL, XLenVT);
11964 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11965 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
11966 CC = ISD::SETNE;
11967 break;
11968 }
11969 }
11970
11971 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11972 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
11973 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
11974
11975 if (!IsVP)
11976 return SetCC;
11977
11978 // Now include the start value in the operation.
11979 // Note that we must return the start value when no elements are operated
11980 // upon. The vcpop instructions we've emitted in each case above will return
11981 // 0 for an inactive vector, and so we've already received the neutral value:
11982 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
11983 // can simply include the start value.
11984 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11985 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
11986}
11987
11988static bool isNonZeroAVL(SDValue AVL) {
11989 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
11990 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
11991 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
11992 (ImmAVL && ImmAVL->getZExtValue() >= 1);
11993}
11994
11995/// Helper to lower a reduction sequence of the form:
11996/// scalar = reduce_op vec, scalar_start
11997static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
11998 SDValue StartValue, SDValue Vec, SDValue Mask,
11999 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
12000 const RISCVSubtarget &Subtarget) {
12001 const MVT VecVT = Vec.getSimpleValueType();
12002 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
12003 const MVT XLenVT = Subtarget.getXLenVT();
12004 const bool NonZeroAVL = isNonZeroAVL(VL);
12005
12006 // The reduction needs an LMUL1 input; do the splat at either LMUL1
12007 // or the original VT if fractional.
12008 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
12009 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
12010 // prove it is non-zero. For the AVL=0 case, we need the scalar to
12011 // be the result of the reduction operation.
12012 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
12013 SDValue InitialValue =
12014 lowerScalarInsert(StartValue, InnerVL, InnerVT, DL, DAG, Subtarget);
12015 if (M1VT != InnerVT)
12016 InitialValue =
12017 DAG.getInsertSubvector(DL, DAG.getUNDEF(M1VT), InitialValue, 0);
12018 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
12020 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
12021 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
12022 return DAG.getExtractVectorElt(DL, ResVT, Reduction, 0);
12023}
12024
12025SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
12026 SelectionDAG &DAG) const {
12027 SDLoc DL(Op);
12028 SDValue Vec = Op.getOperand(0);
12029 EVT VecEVT = Vec.getValueType();
12030
12031 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
12032
12033 // Due to ordering in legalize types we may have a vector type that needs to
12034 // be split. Do that manually so we can get down to a legal type.
12035 while (getTypeAction(*DAG.getContext(), VecEVT) ==
12037 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
12038 VecEVT = Lo.getValueType();
12039 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
12040 }
12041
12042 // TODO: The type may need to be widened rather than split. Or widened before
12043 // it can be split.
12044 if (!isTypeLegal(VecEVT))
12045 return SDValue();
12046
12047 MVT VecVT = VecEVT.getSimpleVT();
12048 MVT VecEltVT = VecVT.getVectorElementType();
12049 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
12050
12051 MVT ContainerVT = VecVT;
12052 if (VecVT.isFixedLengthVector()) {
12053 ContainerVT = getContainerForFixedLengthVector(VecVT);
12054 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12055 }
12056
12057 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
12058
12059 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
12060 switch (BaseOpc) {
12061 case ISD::AND:
12062 case ISD::OR:
12063 case ISD::UMAX:
12064 case ISD::UMIN:
12065 case ISD::SMAX:
12066 case ISD::SMIN:
12067 StartV = DAG.getExtractVectorElt(DL, VecEltVT, Vec, 0);
12068 }
12069 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
12070 Mask, VL, DL, DAG, Subtarget);
12071}
12072
12073// Given a reduction op, this function returns the matching reduction opcode,
12074// the vector SDValue and the scalar SDValue required to lower this to a
12075// RISCVISD node.
12076static std::tuple<unsigned, SDValue, SDValue>
12078 const RISCVSubtarget &Subtarget) {
12079 SDLoc DL(Op);
12080 auto Flags = Op->getFlags();
12081 unsigned Opcode = Op.getOpcode();
12082 switch (Opcode) {
12083 default:
12084 llvm_unreachable("Unhandled reduction");
12085 case ISD::VECREDUCE_FADD: {
12086 // Use positive zero if we can. It is cheaper to materialize.
12087 SDValue Zero =
12088 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
12089 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
12090 }
12092 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
12093 Op.getOperand(0));
12097 case ISD::VECREDUCE_FMAX: {
12098 SDValue Front = DAG.getExtractVectorElt(DL, EltVT, Op.getOperand(0), 0);
12099 unsigned RVVOpc =
12100 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
12101 ? RISCVISD::VECREDUCE_FMIN_VL
12102 : RISCVISD::VECREDUCE_FMAX_VL;
12103 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
12104 }
12105 }
12106}
12107
12108SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
12109 SelectionDAG &DAG) const {
12110 SDLoc DL(Op);
12111 MVT VecEltVT = Op.getSimpleValueType();
12112
12113 unsigned RVVOpcode;
12114 SDValue VectorVal, ScalarVal;
12115 std::tie(RVVOpcode, VectorVal, ScalarVal) =
12116 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
12117 MVT VecVT = VectorVal.getSimpleValueType();
12118
12119 MVT ContainerVT = VecVT;
12120 if (VecVT.isFixedLengthVector()) {
12121 ContainerVT = getContainerForFixedLengthVector(VecVT);
12122 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
12123 }
12124
12125 MVT ResVT = Op.getSimpleValueType();
12126 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
12127 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
12128 VL, DL, DAG, Subtarget);
12129 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
12130 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
12131 return Res;
12132
12133 if (Op->getFlags().hasNoNaNs())
12134 return Res;
12135
12136 // Force output to NaN if any element is Nan.
12137 SDValue IsNan =
12138 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
12139 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
12140 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
12141 MVT XLenVT = Subtarget.getXLenVT();
12142 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
12143 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
12144 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
12145 return DAG.getSelect(
12146 DL, ResVT, NoNaNs, Res,
12147 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
12148}
12149
12150SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
12151 SelectionDAG &DAG) const {
12152 SDLoc DL(Op);
12153 unsigned Opc = Op.getOpcode();
12154 SDValue Start = Op.getOperand(0);
12155 SDValue Vec = Op.getOperand(1);
12156 EVT VecEVT = Vec.getValueType();
12157 MVT XLenVT = Subtarget.getXLenVT();
12158
12159 // TODO: The type may need to be widened rather than split. Or widened before
12160 // it can be split.
12161 if (!isTypeLegal(VecEVT))
12162 return SDValue();
12163
12164 MVT VecVT = VecEVT.getSimpleVT();
12165 unsigned RVVOpcode = getRVVReductionOp(Opc);
12166
12167 if (VecVT.isFixedLengthVector()) {
12168 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
12169 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12170 }
12171
12172 SDValue VL = Op.getOperand(3);
12173 SDValue Mask = Op.getOperand(2);
12174 SDValue Res =
12175 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
12176 Vec, Mask, VL, DL, DAG, Subtarget);
12177 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
12178 Op->getFlags().hasNoNaNs())
12179 return Res;
12180
12181 // Propagate NaNs.
12182 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
12183 // Check if any of the elements in Vec is NaN.
12184 SDValue IsNaN = DAG.getNode(
12185 RISCVISD::SETCC_VL, DL, PredVT,
12186 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
12187 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
12188 // Check if the start value is NaN.
12189 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
12190 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
12191 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
12192 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
12193 MVT ResVT = Res.getSimpleValueType();
12194 return DAG.getSelect(
12195 DL, ResVT, NoNaNs, Res,
12196 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
12197}
12198
12199SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
12200 SelectionDAG &DAG) const {
12201 SDValue Vec = Op.getOperand(0);
12202 SDValue SubVec = Op.getOperand(1);
12203 MVT VecVT = Vec.getSimpleValueType();
12204 MVT SubVecVT = SubVec.getSimpleValueType();
12205
12206 SDLoc DL(Op);
12207 MVT XLenVT = Subtarget.getXLenVT();
12208 unsigned OrigIdx = Op.getConstantOperandVal(2);
12209 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
12210
12211 if (OrigIdx == 0 && Vec.isUndef())
12212 return Op;
12213
12214 // We don't have the ability to slide mask vectors up indexed by their i1
12215 // elements; the smallest we can do is i8. Often we are able to bitcast to
12216 // equivalent i8 vectors. Note that when inserting a fixed-length vector
12217 // into a scalable one, we might not necessarily have enough scalable
12218 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
12219 if (SubVecVT.getVectorElementType() == MVT::i1) {
12220 if (VecVT.getVectorMinNumElements() >= 8 &&
12221 SubVecVT.getVectorMinNumElements() >= 8) {
12222 assert(OrigIdx % 8 == 0 && "Invalid index");
12223 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
12224 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
12225 "Unexpected mask vector lowering");
12226 OrigIdx /= 8;
12227 SubVecVT =
12228 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
12229 SubVecVT.isScalableVector());
12230 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
12231 VecVT.isScalableVector());
12232 Vec = DAG.getBitcast(VecVT, Vec);
12233 SubVec = DAG.getBitcast(SubVecVT, SubVec);
12234 } else {
12235 // We can't slide this mask vector up indexed by its i1 elements.
12236 // This poses a problem when we wish to insert a scalable vector which
12237 // can't be re-expressed as a larger type. Just choose the slow path and
12238 // extend to a larger type, then truncate back down.
12239 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
12240 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
12241 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
12242 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
12243 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
12244 Op.getOperand(2));
12245 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
12246 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
12247 }
12248 }
12249
12250 // If the subvector vector is a fixed-length type and we don't know VLEN
12251 // exactly, we cannot use subregister manipulation to simplify the codegen; we
12252 // don't know which register of a LMUL group contains the specific subvector
12253 // as we only know the minimum register size. Therefore we must slide the
12254 // vector group up the full amount.
12255 const auto VLen = Subtarget.getRealVLen();
12256 if (SubVecVT.isFixedLengthVector() && !VLen) {
12257 MVT ContainerVT = VecVT;
12258 if (VecVT.isFixedLengthVector()) {
12259 ContainerVT = getContainerForFixedLengthVector(VecVT);
12260 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12261 }
12262
12263 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), SubVec, 0);
12264
12265 SDValue Mask =
12266 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
12267 // Set the vector length to only the number of elements we care about. Note
12268 // that for slideup this includes the offset.
12269 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
12270 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
12271
12272 // Use tail agnostic policy if we're inserting over Vec's tail.
12274 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
12276
12277 // If we're inserting into the lowest elements, use a tail undisturbed
12278 // vmv.v.v.
12279 if (OrigIdx == 0) {
12280 SubVec =
12281 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
12282 } else {
12283 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
12284 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
12285 SlideupAmt, Mask, VL, Policy);
12286 }
12287
12288 if (VecVT.isFixedLengthVector())
12289 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
12290 return DAG.getBitcast(Op.getValueType(), SubVec);
12291 }
12292
12293 MVT ContainerVecVT = VecVT;
12294 if (VecVT.isFixedLengthVector()) {
12295 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
12296 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
12297 }
12298
12299 MVT ContainerSubVecVT = SubVecVT;
12300 if (SubVecVT.isFixedLengthVector()) {
12301 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
12302 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
12303 }
12304
12305 unsigned SubRegIdx;
12306 ElementCount RemIdx;
12307 // insert_subvector scales the index by vscale if the subvector is scalable,
12308 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
12309 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
12310 if (SubVecVT.isFixedLengthVector()) {
12311 assert(VLen);
12312 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12313 auto Decompose =
12315 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
12316 SubRegIdx = Decompose.first;
12317 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
12318 (OrigIdx % Vscale));
12319 } else {
12320 auto Decompose =
12322 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
12323 SubRegIdx = Decompose.first;
12324 RemIdx = ElementCount::getScalable(Decompose.second);
12325 }
12326
12327 TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
12329 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
12330 bool ExactlyVecRegSized =
12331 Subtarget.expandVScale(SubVecVT.getSizeInBits())
12332 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
12333
12334 // 1. If the Idx has been completely eliminated and this subvector's size is
12335 // a vector register or a multiple thereof, or the surrounding elements are
12336 // undef, then this is a subvector insert which naturally aligns to a vector
12337 // register. These can easily be handled using subregister manipulation.
12338 // 2. If the subvector isn't an exact multiple of a valid register group size,
12339 // then the insertion must preserve the undisturbed elements of the register.
12340 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
12341 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
12342 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
12343 // of that LMUL=1 type back into the larger vector (resolving to another
12344 // subregister operation). See below for how our VSLIDEUP works. We go via a
12345 // LMUL=1 type to avoid allocating a large register group to hold our
12346 // subvector.
12347 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
12348 if (SubVecVT.isFixedLengthVector()) {
12349 // We may get NoSubRegister if inserting at index 0 and the subvec
12350 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
12351 if (SubRegIdx == RISCV::NoSubRegister) {
12352 assert(OrigIdx == 0);
12353 return Op;
12354 }
12355
12356 // Use a insert_subvector that will resolve to an insert subreg.
12357 assert(VLen);
12358 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12359 SDValue Insert =
12360 DAG.getInsertSubvector(DL, Vec, SubVec, OrigIdx / Vscale);
12361 if (VecVT.isFixedLengthVector())
12362 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
12363 return Insert;
12364 }
12365 return Op;
12366 }
12367
12368 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
12369 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
12370 // (in our case undisturbed). This means we can set up a subvector insertion
12371 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
12372 // size of the subvector.
12373 MVT InterSubVT = ContainerVecVT;
12374 SDValue AlignedExtract = Vec;
12375 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
12376 if (SubVecVT.isFixedLengthVector()) {
12377 assert(VLen);
12378 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
12379 }
12380 if (ContainerVecVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVecVT))) {
12381 InterSubVT = RISCVTargetLowering::getM1VT(ContainerVecVT);
12382 // Extract a subvector equal to the nearest full vector register type. This
12383 // should resolve to a EXTRACT_SUBREG instruction.
12384 AlignedExtract = DAG.getExtractSubvector(DL, InterSubVT, Vec, AlignedIdx);
12385 }
12386
12387 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(InterSubVT), SubVec, 0);
12388
12389 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
12390
12391 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
12392 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
12393
12394 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
12396 if (Subtarget.expandVScale(EndIndex) ==
12397 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
12399
12400 // If we're inserting into the lowest elements, use a tail undisturbed
12401 // vmv.v.v.
12402 if (RemIdx.isZero()) {
12403 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
12404 SubVec, VL);
12405 } else {
12406 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
12407
12408 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
12409 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
12410
12411 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
12412 SlideupAmt, Mask, VL, Policy);
12413 }
12414
12415 // If required, insert this subvector back into the correct vector register.
12416 // This should resolve to an INSERT_SUBREG instruction.
12417 if (ContainerVecVT.bitsGT(InterSubVT))
12418 SubVec = DAG.getInsertSubvector(DL, Vec, SubVec, AlignedIdx);
12419
12420 if (VecVT.isFixedLengthVector())
12421 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
12422
12423 // We might have bitcast from a mask type: cast back to the original type if
12424 // required.
12425 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
12426}
12427
12428SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
12429 SelectionDAG &DAG) const {
12430 SDValue Vec = Op.getOperand(0);
12431 MVT SubVecVT = Op.getSimpleValueType();
12432 MVT VecVT = Vec.getSimpleValueType();
12433
12434 SDLoc DL(Op);
12435 MVT XLenVT = Subtarget.getXLenVT();
12436 unsigned OrigIdx = Op.getConstantOperandVal(1);
12437 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
12438
12439 // With an index of 0 this is a cast-like subvector, which can be performed
12440 // with subregister operations.
12441 if (OrigIdx == 0)
12442 return Op;
12443
12444 // We don't have the ability to slide mask vectors down indexed by their i1
12445 // elements; the smallest we can do is i8. Often we are able to bitcast to
12446 // equivalent i8 vectors. Note that when extracting a fixed-length vector
12447 // from a scalable one, we might not necessarily have enough scalable
12448 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
12449 if (SubVecVT.getVectorElementType() == MVT::i1) {
12450 if (VecVT.getVectorMinNumElements() >= 8 &&
12451 SubVecVT.getVectorMinNumElements() >= 8) {
12452 assert(OrigIdx % 8 == 0 && "Invalid index");
12453 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
12454 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
12455 "Unexpected mask vector lowering");
12456 OrigIdx /= 8;
12457 SubVecVT =
12458 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
12459 SubVecVT.isScalableVector());
12460 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
12461 VecVT.isScalableVector());
12462 Vec = DAG.getBitcast(VecVT, Vec);
12463 } else {
12464 // We can't slide this mask vector down, indexed by its i1 elements.
12465 // This poses a problem when we wish to extract a scalable vector which
12466 // can't be re-expressed as a larger type. Just choose the slow path and
12467 // extend to a larger type, then truncate back down.
12468 // TODO: We could probably improve this when extracting certain fixed
12469 // from fixed, where we can extract as i8 and shift the correct element
12470 // right to reach the desired subvector?
12471 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
12472 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
12473 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
12474 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
12475 Op.getOperand(1));
12476 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
12477 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
12478 }
12479 }
12480
12481 const auto VLen = Subtarget.getRealVLen();
12482
12483 // If the subvector vector is a fixed-length type and we don't know VLEN
12484 // exactly, we cannot use subregister manipulation to simplify the codegen; we
12485 // don't know which register of a LMUL group contains the specific subvector
12486 // as we only know the minimum register size. Therefore we must slide the
12487 // vector group down the full amount.
12488 if (SubVecVT.isFixedLengthVector() && !VLen) {
12489 MVT ContainerVT = VecVT;
12490 if (VecVT.isFixedLengthVector()) {
12491 ContainerVT = getContainerForFixedLengthVector(VecVT);
12492 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12493 }
12494
12495 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
12496 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
12497 if (auto ShrunkVT =
12498 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
12499 ContainerVT = *ShrunkVT;
12500 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
12501 }
12502
12503 SDValue Mask =
12504 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
12505 // Set the vector length to only the number of elements we care about. This
12506 // avoids sliding down elements we're going to discard straight away.
12507 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
12508 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
12509 SDValue Slidedown =
12510 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12511 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
12512 // Now we can use a cast-like subvector extract to get the result.
12513 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
12514 return DAG.getBitcast(Op.getValueType(), Slidedown);
12515 }
12516
12517 if (VecVT.isFixedLengthVector()) {
12518 VecVT = getContainerForFixedLengthVector(VecVT);
12519 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
12520 }
12521
12522 MVT ContainerSubVecVT = SubVecVT;
12523 if (SubVecVT.isFixedLengthVector())
12524 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
12525
12526 unsigned SubRegIdx;
12527 ElementCount RemIdx;
12528 // extract_subvector scales the index by vscale if the subvector is scalable,
12529 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
12530 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
12531 if (SubVecVT.isFixedLengthVector()) {
12532 assert(VLen);
12533 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12534 auto Decompose =
12536 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
12537 SubRegIdx = Decompose.first;
12538 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
12539 (OrigIdx % Vscale));
12540 } else {
12541 auto Decompose =
12543 VecVT, ContainerSubVecVT, OrigIdx, TRI);
12544 SubRegIdx = Decompose.first;
12545 RemIdx = ElementCount::getScalable(Decompose.second);
12546 }
12547
12548 // If the Idx has been completely eliminated then this is a subvector extract
12549 // which naturally aligns to a vector register. These can easily be handled
12550 // using subregister manipulation. We use an extract_subvector that will
12551 // resolve to an extract subreg.
12552 if (RemIdx.isZero()) {
12553 if (SubVecVT.isFixedLengthVector()) {
12554 assert(VLen);
12555 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12556 Vec =
12557 DAG.getExtractSubvector(DL, ContainerSubVecVT, Vec, OrigIdx / Vscale);
12558 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
12559 }
12560 return Op;
12561 }
12562
12563 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
12564 // was > M1 then the index would need to be a multiple of VLMAX, and so would
12565 // divide exactly.
12566 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
12567 getLMUL(ContainerSubVecVT) == RISCVVType::LMUL_1);
12568
12569 // If the vector type is an LMUL-group type, extract a subvector equal to the
12570 // nearest full vector register type.
12571 MVT InterSubVT = VecVT;
12572 if (VecVT.bitsGT(RISCVTargetLowering::getM1VT(VecVT))) {
12573 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
12574 // we should have successfully decomposed the extract into a subregister.
12575 // We use an extract_subvector that will resolve to a subreg extract.
12576 assert(SubRegIdx != RISCV::NoSubRegister);
12577 (void)SubRegIdx;
12578 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
12579 if (SubVecVT.isFixedLengthVector()) {
12580 assert(VLen);
12581 Idx /= *VLen / RISCV::RVVBitsPerBlock;
12582 }
12583 InterSubVT = RISCVTargetLowering::getM1VT(VecVT);
12584 Vec = DAG.getExtractSubvector(DL, InterSubVT, Vec, Idx);
12585 }
12586
12587 // Slide this vector register down by the desired number of elements in order
12588 // to place the desired subvector starting at element 0.
12589 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
12590 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
12591 if (SubVecVT.isFixedLengthVector())
12592 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
12593 SDValue Slidedown =
12594 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
12595 Vec, SlidedownAmt, Mask, VL);
12596
12597 // Now the vector is in the right position, extract our final subvector. This
12598 // should resolve to a COPY.
12599 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
12600
12601 // We might have bitcast from a mask type: cast back to the original type if
12602 // required.
12603 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
12604}
12605
12606// Widen a vector's operands to i8, then truncate its results back to the
12607// original type, typically i1. All operand and result types must be the same.
12609 SelectionDAG &DAG) {
12610 MVT VT = N.getSimpleValueType();
12611 MVT WideVT = VT.changeVectorElementType(MVT::i8);
12613 for (SDValue Op : N->ops()) {
12614 assert(Op.getSimpleValueType() == VT &&
12615 "Operands and result must be same type");
12616 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
12617 }
12618
12619 unsigned NumVals = N->getNumValues();
12620
12622 NumVals,
12623 N.getValueType().changeVectorElementType(*DAG.getContext(), MVT::i8)));
12624 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
12625 SmallVector<SDValue, 4> TruncVals;
12626 for (unsigned I = 0; I < NumVals; I++) {
12627 TruncVals.push_back(
12628 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
12629 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
12630 }
12631
12632 if (TruncVals.size() > 1)
12633 return DAG.getMergeValues(TruncVals, DL);
12634 return TruncVals.front();
12635}
12636
12637SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
12638 SelectionDAG &DAG) const {
12639 SDLoc DL(Op);
12640 MVT VecVT = Op.getSimpleValueType();
12641
12642 const unsigned Factor = Op->getNumValues();
12643 assert(Factor <= 8);
12644
12645 // 1 bit element vectors need to be widened to e8
12646 if (VecVT.getVectorElementType() == MVT::i1)
12647 return widenVectorOpsToi8(Op, DL, DAG);
12648
12649 // Convert to scalable vectors first.
12650 if (VecVT.isFixedLengthVector()) {
12651 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12653 for (unsigned i = 0U; i < Factor; ++i)
12654 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12655 Subtarget);
12656
12657 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12658 SDValue NewDeinterleave =
12660
12661 SmallVector<SDValue, 8> Res(Factor);
12662 for (unsigned i = 0U; i < Factor; ++i)
12663 Res[i] = convertFromScalableVector(VecVT, NewDeinterleave.getValue(i),
12664 DAG, Subtarget);
12665 return DAG.getMergeValues(Res, DL);
12666 }
12667
12668 // If concatenating would exceed LMUL=8, we need to split.
12669 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12670 (8 * RISCV::RVVBitsPerBlock)) {
12671 SmallVector<SDValue, 8> Ops(Factor * 2);
12672 for (unsigned i = 0; i != Factor; ++i) {
12673 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12674 Ops[i * 2] = OpLo;
12675 Ops[i * 2 + 1] = OpHi;
12676 }
12677
12678 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12679
12681 ArrayRef(Ops).slice(0, Factor));
12683 ArrayRef(Ops).slice(Factor, Factor));
12684
12685 SmallVector<SDValue, 8> Res(Factor);
12686 for (unsigned i = 0; i != Factor; ++i)
12687 Res[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo.getValue(i),
12688 Hi.getValue(i));
12689
12690 return DAG.getMergeValues(Res, DL);
12691 }
12692
12693 if (Subtarget.hasVendorXRivosVizip() && Factor == 2) {
12694 MVT VT = Op->getSimpleValueType(0);
12695 SDValue V1 = Op->getOperand(0);
12696 SDValue V2 = Op->getOperand(1);
12697
12698 // For fractional LMUL, check if we can use a higher LMUL
12699 // instruction to avoid a vslidedown.
12700 if (SDValue Src = foldConcatVector(V1, V2);
12701 Src && RISCVTargetLowering::getM1VT(VT).bitsGT(VT)) {
12702 EVT NewVT = VT.getDoubleNumVectorElementsVT();
12703 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
12704 // Freeze the source so we can increase its use count.
12705 Src = DAG.getFreeze(Src);
12706 SDValue Even = lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, Src,
12707 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12708 SDValue Odd = lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, Src,
12709 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12710 Even = DAG.getExtractSubvector(DL, VT, Even, 0);
12711 Odd = DAG.getExtractSubvector(DL, VT, Odd, 0);
12712 return DAG.getMergeValues({Even, Odd}, DL);
12713 }
12714
12715 // Freeze the sources so we can increase their use count.
12716 V1 = DAG.getFreeze(V1);
12717 V2 = DAG.getFreeze(V2);
12718 SDValue Even =
12719 lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12720 SDValue Odd =
12721 lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12722 return DAG.getMergeValues({Even, Odd}, DL);
12723 }
12724
12725 SmallVector<SDValue, 8> Ops(Op->op_values());
12726
12727 // Concatenate the vectors as one vector to deinterleave
12728 MVT ConcatVT =
12731 PowerOf2Ceil(Factor)));
12732 if (Ops.size() < PowerOf2Ceil(Factor))
12733 Ops.append(PowerOf2Ceil(Factor) - Factor, DAG.getUNDEF(VecVT));
12734 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, Ops);
12735
12736 if (Factor == 2) {
12737 // We can deinterleave through vnsrl.wi if the element type is smaller than
12738 // ELEN
12739 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12740 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
12741 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
12742 return DAG.getMergeValues({Even, Odd}, DL);
12743 }
12744
12745 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
12746 // possibly mask vector, then extract the required subvector. Doing this
12747 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
12748 // creation to be rematerialized during register allocation to reduce
12749 // register pressure if needed.
12750
12751 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
12752
12753 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
12754 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
12755 SDValue EvenMask = DAG.getExtractSubvector(DL, MaskVT, EvenSplat, 0);
12756
12757 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
12758 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
12759 SDValue OddMask = DAG.getExtractSubvector(DL, MaskVT, OddSplat, 0);
12760
12761 // vcompress the even and odd elements into two separate vectors
12762 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12763 EvenMask, DAG.getUNDEF(ConcatVT));
12764 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12765 OddMask, DAG.getUNDEF(ConcatVT));
12766
12767 // Extract the result half of the gather for even and odd
12768 SDValue Even = DAG.getExtractSubvector(DL, VecVT, EvenWide, 0);
12769 SDValue Odd = DAG.getExtractSubvector(DL, VecVT, OddWide, 0);
12770
12771 return DAG.getMergeValues({Even, Odd}, DL);
12772 }
12773
12774 // Store with unit-stride store and load it back with segmented load.
12775 MVT XLenVT = Subtarget.getXLenVT();
12776 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12777 SDValue Passthru = DAG.getUNDEF(ConcatVT);
12778
12779 // Allocate a stack slot.
12780 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12782 DAG.CreateStackTemporary(ConcatVT.getStoreSize(), Alignment);
12783 auto &MF = DAG.getMachineFunction();
12784 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12785 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12786
12787 SDValue StoreOps[] = {DAG.getEntryNode(),
12788 DAG.getTargetConstant(Intrinsic::riscv_vse, DL, XLenVT),
12789 Concat, StackPtr, VL};
12790
12791 SDValue Chain = DAG.getMemIntrinsicNode(
12792 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), StoreOps,
12793 ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12795
12796 static const Intrinsic::ID VlsegIntrinsicsIds[] = {
12797 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
12798 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
12799 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
12800 Intrinsic::riscv_vlseg8_mask};
12801
12802 SDValue LoadOps[] = {
12803 Chain,
12804 DAG.getTargetConstant(VlsegIntrinsicsIds[Factor - 2], DL, XLenVT),
12805 Passthru,
12806 StackPtr,
12807 Mask,
12808 VL,
12811 DAG.getTargetConstant(Log2_64(VecVT.getScalarSizeInBits()), DL, XLenVT)};
12812
12813 unsigned Sz =
12814 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12815 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12816
12818 ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList({VecTupTy, MVT::Other}),
12819 LoadOps, ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12821
12822 SmallVector<SDValue, 8> Res(Factor);
12823
12824 for (unsigned i = 0U; i < Factor; ++i)
12825 Res[i] = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, VecVT, Load,
12826 DAG.getTargetConstant(i, DL, MVT::i32));
12827
12828 return DAG.getMergeValues(Res, DL);
12829}
12830
12831SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
12832 SelectionDAG &DAG) const {
12833 SDLoc DL(Op);
12834 MVT VecVT = Op.getSimpleValueType();
12835
12836 const unsigned Factor = Op.getNumOperands();
12837 assert(Factor <= 8);
12838
12839 // i1 vectors need to be widened to i8
12840 if (VecVT.getVectorElementType() == MVT::i1)
12841 return widenVectorOpsToi8(Op, DL, DAG);
12842
12843 // Convert to scalable vectors first.
12844 if (VecVT.isFixedLengthVector()) {
12845 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12847 for (unsigned i = 0U; i < Factor; ++i)
12848 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12849 Subtarget);
12850
12851 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12852 SDValue NewInterleave = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs, Ops);
12853
12854 SmallVector<SDValue, 8> Res(Factor);
12855 for (unsigned i = 0U; i < Factor; ++i)
12856 Res[i] = convertFromScalableVector(VecVT, NewInterleave.getValue(i), DAG,
12857 Subtarget);
12858 return DAG.getMergeValues(Res, DL);
12859 }
12860
12861 MVT XLenVT = Subtarget.getXLenVT();
12862 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12863
12864 // If the VT is larger than LMUL=8, we need to split and reassemble.
12865 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12866 (8 * RISCV::RVVBitsPerBlock)) {
12867 SmallVector<SDValue, 8> Ops(Factor * 2);
12868 for (unsigned i = 0; i != Factor; ++i) {
12869 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12870 Ops[i] = OpLo;
12871 Ops[i + Factor] = OpHi;
12872 }
12873
12874 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12875
12876 SDValue Res[] = {DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs,
12877 ArrayRef(Ops).take_front(Factor)),
12879 ArrayRef(Ops).drop_front(Factor))};
12880
12881 SmallVector<SDValue, 8> Concats(Factor);
12882 for (unsigned i = 0; i != Factor; ++i) {
12883 unsigned IdxLo = 2 * i;
12884 unsigned IdxHi = 2 * i + 1;
12885 Concats[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
12886 Res[IdxLo / Factor].getValue(IdxLo % Factor),
12887 Res[IdxHi / Factor].getValue(IdxHi % Factor));
12888 }
12889
12890 return DAG.getMergeValues(Concats, DL);
12891 }
12892
12893 SDValue Interleaved;
12894
12895 // Spill to the stack using a segment store for simplicity.
12896 if (Factor != 2) {
12897 EVT MemVT =
12899 VecVT.getVectorElementCount() * Factor);
12900
12901 // Allocate a stack slot.
12902 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12904 DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
12905 EVT PtrVT = StackPtr.getValueType();
12906 auto &MF = DAG.getMachineFunction();
12907 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12908 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12909
12910 static const Intrinsic::ID IntrIds[] = {
12911 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
12912 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
12913 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
12914 Intrinsic::riscv_vsseg8_mask,
12915 };
12916
12917 unsigned Sz =
12918 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12919 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12920
12921 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
12922 for (unsigned i = 0; i < Factor; i++)
12923 StoredVal =
12924 DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
12925 Op.getOperand(i), DAG.getTargetConstant(i, DL, MVT::i32));
12926
12927 SDValue Ops[] = {DAG.getEntryNode(),
12928 DAG.getTargetConstant(IntrIds[Factor - 2], DL, XLenVT),
12929 StoredVal,
12930 StackPtr,
12931 Mask,
12932 VL,
12934 DL, XLenVT)};
12935
12936 SDValue Chain = DAG.getMemIntrinsicNode(
12937 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
12938 VecVT.getVectorElementType(), PtrInfo, Alignment,
12940
12941 SmallVector<SDValue, 8> Loads(Factor);
12942
12943 SDValue Increment = DAG.getTypeSize(DL, PtrVT, VecVT.getStoreSize());
12944 for (unsigned i = 0; i != Factor; ++i) {
12945 if (i != 0)
12946 StackPtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, Increment);
12947
12948 Loads[i] = DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12949 }
12950
12951 return DAG.getMergeValues(Loads, DL);
12952 }
12953
12954 // Use ri.vzip2{a,b} if available
12955 // TODO: Figure out the best lowering for the spread variants
12956 if (Subtarget.hasVendorXRivosVizip() && !Op.getOperand(0).isUndef() &&
12957 !Op.getOperand(1).isUndef()) {
12958 // Freeze the sources so we can increase their use count.
12959 SDValue V1 = DAG.getFreeze(Op->getOperand(0));
12960 SDValue V2 = DAG.getFreeze(Op->getOperand(1));
12961 SDValue Lo = lowerVZIP(RISCVISD::RI_VZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12962 SDValue Hi = lowerVZIP(RISCVISD::RI_VZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12963 return DAG.getMergeValues({Lo, Hi}, DL);
12964 }
12965
12966 // If the element type is smaller than ELEN, then we can interleave with
12967 // vwaddu.vv and vwmaccu.vx
12968 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12969 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
12970 DAG, Subtarget);
12971 } else {
12972 // Otherwise, fallback to using vrgathere16.vv
12973 MVT ConcatVT =
12976 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
12977 Op.getOperand(0), Op.getOperand(1));
12978
12979 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
12980
12981 // 0 1 2 3 4 5 6 7 ...
12982 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
12983
12984 // 1 1 1 1 1 1 1 1 ...
12985 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
12986
12987 // 1 0 1 0 1 0 1 0 ...
12988 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
12989 OddMask = DAG.getSetCC(
12990 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
12991 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
12993
12994 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
12995
12996 // Build up the index vector for interleaving the concatenated vector
12997 // 0 0 1 1 2 2 3 3 ...
12998 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
12999 // 0 n 1 n+1 2 n+2 3 n+3 ...
13000 Idx =
13001 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
13002
13003 // Then perform the interleave
13004 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
13005 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
13006 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
13007 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
13008 }
13009
13010 // Extract the two halves from the interleaved result
13011 SDValue Lo = DAG.getExtractSubvector(DL, VecVT, Interleaved, 0);
13012 SDValue Hi = DAG.getExtractSubvector(DL, VecVT, Interleaved,
13013 VecVT.getVectorMinNumElements());
13014
13015 return DAG.getMergeValues({Lo, Hi}, DL);
13016}
13017
13018// Lower step_vector to the vid instruction. Any non-identity step value must
13019// be accounted for my manual expansion.
13020SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
13021 SelectionDAG &DAG) const {
13022 SDLoc DL(Op);
13023 MVT VT = Op.getSimpleValueType();
13024 assert(VT.isScalableVector() && "Expected scalable vector");
13025 MVT XLenVT = Subtarget.getXLenVT();
13026 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
13027 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
13028 uint64_t StepValImm = Op.getConstantOperandVal(0);
13029 if (StepValImm != 1) {
13030 if (isPowerOf2_64(StepValImm)) {
13031 SDValue StepVal =
13032 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
13033 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
13034 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
13035 } else {
13036 SDValue StepVal = lowerScalarSplat(
13037 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
13038 VL, VT, DL, DAG, Subtarget);
13039 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
13040 }
13041 }
13042 return StepVec;
13043}
13044
13045// Implement vector_reverse using vrgather.vv with indices determined by
13046// subtracting the id of each element from (VLMAX-1). This will convert
13047// the indices like so:
13048// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
13049// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
13050SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
13051 SelectionDAG &DAG) const {
13052 SDLoc DL(Op);
13053 MVT VecVT = Op.getSimpleValueType();
13054 if (VecVT.getVectorElementType() == MVT::i1) {
13055 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
13056 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
13057 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
13058 return DAG.getSetCC(DL, VecVT, Op2,
13059 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
13060 }
13061
13062 MVT ContainerVT = VecVT;
13063 SDValue Vec = Op.getOperand(0);
13064 if (VecVT.isFixedLengthVector()) {
13065 ContainerVT = getContainerForFixedLengthVector(VecVT);
13066 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
13067 }
13068
13069 MVT XLenVT = Subtarget.getXLenVT();
13070 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
13071
13072 // On some uarchs vrgather.vv will read from every input register for each
13073 // output register, regardless of the indices. However to reverse a vector
13074 // each output register only needs to read from one register. So decompose it
13075 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
13076 // O(LMUL^2).
13077 //
13078 // vsetvli a1, zero, e64, m4, ta, ma
13079 // vrgatherei16.vv v12, v8, v16
13080 // ->
13081 // vsetvli a1, zero, e64, m1, ta, ma
13082 // vrgather.vv v15, v8, v16
13083 // vrgather.vv v14, v9, v16
13084 // vrgather.vv v13, v10, v16
13085 // vrgather.vv v12, v11, v16
13086 if (ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
13087 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
13088 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
13089 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
13090 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
13091 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
13092
13093 // Fixed length vectors might not fit exactly into their container, and so
13094 // leave a gap in the front of the vector after being reversed. Slide this
13095 // away.
13096 //
13097 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
13098 // 0 1 2 3 x x x x <- reverse
13099 // x x x x 0 1 2 3 <- vslidedown.vx
13100 if (VecVT.isFixedLengthVector()) {
13101 SDValue Offset = DAG.getNode(
13102 ISD::SUB, DL, XLenVT,
13103 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
13104 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
13105 Concat =
13106 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13107 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
13108 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
13109 }
13110 return Concat;
13111 }
13112
13113 unsigned EltSize = ContainerVT.getScalarSizeInBits();
13114 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
13115 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
13116 unsigned MaxVLMAX =
13117 VecVT.isFixedLengthVector()
13118 ? VecVT.getVectorNumElements()
13119 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
13120
13121 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
13122 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
13123
13124 // If this is SEW=8 and VLMAX is potentially more than 256, we need
13125 // to use vrgatherei16.vv.
13126 if (MaxVLMAX > 256 && EltSize == 8) {
13127 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
13128 // Reverse each half, then reassemble them in reverse order.
13129 // NOTE: It's also possible that after splitting that VLMAX no longer
13130 // requires vrgatherei16.vv.
13131 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
13132 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
13133 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
13134 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
13135 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
13136 // Reassemble the low and high pieces reversed.
13137 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Hi, Lo);
13138 }
13139
13140 // Just promote the int type to i16 which will double the LMUL.
13141 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
13142 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
13143 }
13144
13145 // At LMUL > 1, do the index computation in 16 bits to reduce register
13146 // pressure.
13147 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
13148 IntVT.bitsGT(RISCVTargetLowering::getM1VT(IntVT))) {
13149 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
13150 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
13151 IntVT = IntVT.changeVectorElementType(MVT::i16);
13152 }
13153
13154 // Calculate VLMAX-1 for the desired SEW.
13155 SDValue VLMinus1 = DAG.getNode(
13156 ISD::SUB, DL, XLenVT,
13157 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
13158 DAG.getConstant(1, DL, XLenVT));
13159
13160 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
13161 bool IsRV32E64 =
13162 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
13163 SDValue SplatVL;
13164 if (!IsRV32E64)
13165 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
13166 else
13167 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
13168 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
13169
13170 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
13171 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
13172 DAG.getUNDEF(IntVT), Mask, VL);
13173
13174 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
13175 DAG.getUNDEF(ContainerVT), Mask, VL);
13176 if (VecVT.isFixedLengthVector())
13177 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
13178 return Gather;
13179}
13180
13181SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
13182 SelectionDAG &DAG) const {
13183 SDLoc DL(Op);
13184 SDValue V1 = Op.getOperand(0);
13185 SDValue V2 = Op.getOperand(1);
13186 SDValue Offset = Op.getOperand(2);
13187 MVT XLenVT = Subtarget.getXLenVT();
13188 MVT VecVT = Op.getSimpleValueType();
13189
13190 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
13191
13192 SDValue DownOffset, UpOffset;
13193 if (Op.getOpcode() == ISD::VECTOR_SPLICE_LEFT) {
13194 // The operand is a TargetConstant, we need to rebuild it as a regular
13195 // constant.
13196 DownOffset = Offset;
13197 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, Offset);
13198 } else {
13199 // The operand is a TargetConstant, we need to rebuild it as a regular
13200 // constant rather than negating the original operand.
13201 UpOffset = Offset;
13202 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, Offset);
13203 }
13204
13205 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
13206
13207 SDValue SlideDown = getVSlidedown(
13208 DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, DownOffset, TrueMask,
13209 Subtarget.hasVLDependentLatency() ? UpOffset
13210 : DAG.getRegister(RISCV::X0, XLenVT));
13211 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
13212 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
13214}
13215
13216SDValue
13217RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
13218 SelectionDAG &DAG) const {
13219 SDLoc DL(Op);
13220 auto *Load = cast<LoadSDNode>(Op);
13221
13223 Load->getMemoryVT(),
13224 *Load->getMemOperand()) &&
13225 "Expecting a correctly-aligned load");
13226
13227 MVT VT = Op.getSimpleValueType();
13228 MVT XLenVT = Subtarget.getXLenVT();
13229 MVT ContainerVT = getContainerForFixedLengthVector(VT);
13230
13231 // If we know the exact VLEN and our fixed length vector completely fills
13232 // the container, use a whole register load instead.
13233 const auto [MinVLMAX, MaxVLMAX] =
13234 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
13235 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
13236 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
13237 MachineMemOperand *MMO = Load->getMemOperand();
13238 SDValue NewLoad =
13239 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
13240 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
13241 MMO->getAAInfo(), MMO->getRanges());
13242 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
13243 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
13244 }
13245
13246 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
13247
13248 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
13249 SDValue IntID = DAG.getTargetConstant(
13250 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
13251 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
13252 if (!IsMaskOp)
13253 Ops.push_back(DAG.getUNDEF(ContainerVT));
13254 Ops.push_back(Load->getBasePtr());
13255 Ops.push_back(VL);
13256 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
13257 SDValue NewLoad =
13259 Load->getMemoryVT(), Load->getMemOperand());
13260
13261 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
13262 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
13263}
13264
13265SDValue
13266RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
13267 SelectionDAG &DAG) const {
13268 SDLoc DL(Op);
13269 auto *Store = cast<StoreSDNode>(Op);
13270
13272 Store->getMemoryVT(),
13273 *Store->getMemOperand()) &&
13274 "Expecting a correctly-aligned store");
13275
13276 SDValue StoreVal = Store->getValue();
13277 MVT VT = StoreVal.getSimpleValueType();
13278 MVT XLenVT = Subtarget.getXLenVT();
13279
13280 // If the size less than a byte, we need to pad with zeros to make a byte.
13281 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
13282 VT = MVT::v8i1;
13283 StoreVal =
13284 DAG.getInsertSubvector(DL, DAG.getConstant(0, DL, VT), StoreVal, 0);
13285 }
13286
13287 MVT ContainerVT = getContainerForFixedLengthVector(VT);
13288
13289 SDValue NewValue =
13290 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
13291
13292 // If we know the exact VLEN and our fixed length vector completely fills
13293 // the container, use a whole register store instead.
13294 const auto [MinVLMAX, MaxVLMAX] =
13295 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
13296 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
13297 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
13298 MachineMemOperand *MMO = Store->getMemOperand();
13299 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
13300 MMO->getPointerInfo(), MMO->getBaseAlign(),
13301 MMO->getFlags(), MMO->getAAInfo());
13302 }
13303
13304 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
13305
13306 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
13307 SDValue IntID = DAG.getTargetConstant(
13308 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
13309 return DAG.getMemIntrinsicNode(
13310 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
13311 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
13312 Store->getMemoryVT(), Store->getMemOperand());
13313}
13314
13315SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
13316 SelectionDAG &DAG) const {
13317 SDLoc DL(Op);
13318 MVT VT = Op.getSimpleValueType();
13319
13320 const auto *MemSD = cast<MemSDNode>(Op);
13321 EVT MemVT = MemSD->getMemoryVT();
13322 MachineMemOperand *MMO = MemSD->getMemOperand();
13323 SDValue Chain = MemSD->getChain();
13324 SDValue BasePtr = MemSD->getBasePtr();
13325
13326 SDValue Mask, PassThru, VL;
13327 bool IsExpandingLoad = false;
13328 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
13329 Mask = VPLoad->getMask();
13330 PassThru = DAG.getUNDEF(VT);
13331 VL = VPLoad->getVectorLength();
13332 } else {
13333 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
13334 Mask = MLoad->getMask();
13335 PassThru = MLoad->getPassThru();
13336 IsExpandingLoad = MLoad->isExpandingLoad();
13337 }
13338
13339 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
13340
13341 MVT XLenVT = Subtarget.getXLenVT();
13342
13343 MVT ContainerVT = VT;
13344 if (VT.isFixedLengthVector()) {
13345 ContainerVT = getContainerForFixedLengthVector(VT);
13346 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
13347 if (!IsUnmasked) {
13348 MVT MaskVT = getMaskTypeFor(ContainerVT);
13349 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13350 }
13351 }
13352
13353 if (!VL)
13354 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13355
13356 SDValue ExpandingVL;
13357 if (!IsUnmasked && IsExpandingLoad) {
13358 ExpandingVL = VL;
13359 VL =
13360 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
13361 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
13362 }
13363
13364 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
13365 : Intrinsic::riscv_vle_mask;
13366 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
13367 if (IntID == Intrinsic::riscv_vle)
13368 Ops.push_back(DAG.getUNDEF(ContainerVT));
13369 else
13370 Ops.push_back(PassThru);
13371 Ops.push_back(BasePtr);
13372 if (IntID == Intrinsic::riscv_vle_mask)
13373 Ops.push_back(Mask);
13374 Ops.push_back(VL);
13375 if (IntID == Intrinsic::riscv_vle_mask)
13376 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
13377
13378 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
13379
13380 SDValue Result =
13381 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
13382 Chain = Result.getValue(1);
13383 if (ExpandingVL) {
13384 MVT IndexVT = ContainerVT;
13385 if (ContainerVT.isFloatingPoint())
13386 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
13387
13388 MVT IndexEltVT = IndexVT.getVectorElementType();
13389 bool UseVRGATHEREI16 = false;
13390 // If index vector is an i8 vector and the element count exceeds 256, we
13391 // should change the element type of index vector to i16 to avoid
13392 // overflow.
13393 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
13394 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
13395 assert(getLMUL(IndexVT) != RISCVVType::LMUL_8);
13396 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
13397 UseVRGATHEREI16 = true;
13398 }
13399
13400 SDValue Iota =
13401 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
13402 DAG.getTargetConstant(Intrinsic::riscv_viota, DL, XLenVT),
13403 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
13404 Result =
13405 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
13406 : RISCVISD::VRGATHER_VV_VL,
13407 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
13408 }
13409
13410 if (VT.isFixedLengthVector())
13411 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13412
13413 return DAG.getMergeValues({Result, Chain}, DL);
13414}
13415
13416SDValue RISCVTargetLowering::lowerLoadFF(SDValue Op, SelectionDAG &DAG) const {
13417 SDLoc DL(Op);
13418 MVT VT = Op->getSimpleValueType(0);
13419
13420 const auto *VPLoadFF = cast<VPLoadFFSDNode>(Op);
13421 EVT MemVT = VPLoadFF->getMemoryVT();
13422 MachineMemOperand *MMO = VPLoadFF->getMemOperand();
13423 SDValue Chain = VPLoadFF->getChain();
13424 SDValue BasePtr = VPLoadFF->getBasePtr();
13425
13426 SDValue Mask = VPLoadFF->getMask();
13427 SDValue VL = VPLoadFF->getVectorLength();
13428
13429 MVT XLenVT = Subtarget.getXLenVT();
13430
13431 MVT ContainerVT = VT;
13432 if (VT.isFixedLengthVector()) {
13433 ContainerVT = getContainerForFixedLengthVector(VT);
13434 MVT MaskVT = getMaskTypeFor(ContainerVT);
13435 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13436 }
13437
13438 unsigned IntID = Intrinsic::riscv_vleff_mask;
13439 SDValue Ops[] = {
13440 Chain,
13441 DAG.getTargetConstant(IntID, DL, XLenVT),
13442 DAG.getUNDEF(ContainerVT),
13443 BasePtr,
13444 Mask,
13445 VL,
13447
13448 SDVTList VTs = DAG.getVTList({ContainerVT, Op->getValueType(1), MVT::Other});
13449
13450 SDValue Result =
13451 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
13452 SDValue OutVL = Result.getValue(1);
13453 Chain = Result.getValue(2);
13454
13455 if (VT.isFixedLengthVector())
13456 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13457
13458 return DAG.getMergeValues({Result, OutVL, Chain}, DL);
13459}
13460
13461SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
13462 SelectionDAG &DAG) const {
13463 SDLoc DL(Op);
13464
13465 const auto *MemSD = cast<MemSDNode>(Op);
13466 EVT MemVT = MemSD->getMemoryVT();
13467 MachineMemOperand *MMO = MemSD->getMemOperand();
13468 SDValue Chain = MemSD->getChain();
13469 SDValue BasePtr = MemSD->getBasePtr();
13470 SDValue Val, Mask, VL;
13471
13472 bool IsCompressingStore = false;
13473 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
13474 Val = VPStore->getValue();
13475 Mask = VPStore->getMask();
13476 VL = VPStore->getVectorLength();
13477 } else {
13478 const auto *MStore = cast<MaskedStoreSDNode>(Op);
13479 Val = MStore->getValue();
13480 Mask = MStore->getMask();
13481 IsCompressingStore = MStore->isCompressingStore();
13482 }
13483
13484 bool IsUnmasked =
13485 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
13486
13487 MVT VT = Val.getSimpleValueType();
13488 MVT XLenVT = Subtarget.getXLenVT();
13489
13490 MVT ContainerVT = VT;
13491 if (VT.isFixedLengthVector()) {
13492 ContainerVT = getContainerForFixedLengthVector(VT);
13493
13494 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
13495 if (!IsUnmasked || IsCompressingStore) {
13496 MVT MaskVT = getMaskTypeFor(ContainerVT);
13497 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13498 }
13499 }
13500
13501 if (!VL)
13502 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13503
13504 if (IsCompressingStore) {
13505 Val = DAG.getNode(
13506 ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
13507 DAG.getTargetConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
13508 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
13509 VL =
13510 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
13511 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
13512 }
13513
13514 unsigned IntID =
13515 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
13516 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
13517 Ops.push_back(Val);
13518 Ops.push_back(BasePtr);
13519 if (!IsUnmasked)
13520 Ops.push_back(Mask);
13521 Ops.push_back(VL);
13522
13524 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
13525}
13526
13527SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
13528 SelectionDAG &DAG) const {
13529 SDLoc DL(Op);
13530 SDValue Val = Op.getOperand(0);
13531 SDValue Mask = Op.getOperand(1);
13532 SDValue Passthru = Op.getOperand(2);
13533
13534 MVT VT = Val.getSimpleValueType();
13535 MVT XLenVT = Subtarget.getXLenVT();
13536 MVT ContainerVT = VT;
13537 if (VT.isFixedLengthVector()) {
13538 ContainerVT = getContainerForFixedLengthVector(VT);
13539 MVT MaskVT = getMaskTypeFor(ContainerVT);
13540 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
13541 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13542 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
13543 }
13544
13545 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13546 SDValue Res =
13547 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
13548 DAG.getTargetConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
13549 Passthru, Val, Mask, VL);
13550
13551 if (VT.isFixedLengthVector())
13552 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
13553
13554 return Res;
13555}
13556
13557SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
13558 SelectionDAG &DAG) const {
13559 unsigned Opc = Op.getOpcode();
13560 SDLoc DL(Op);
13561 SDValue Chain = Op.getOperand(0);
13562 SDValue Op1 = Op.getOperand(1);
13563 SDValue Op2 = Op.getOperand(2);
13564 SDValue CC = Op.getOperand(3);
13565 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
13566 MVT VT = Op.getSimpleValueType();
13567 MVT InVT = Op1.getSimpleValueType();
13568
13569 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
13570 // condition code.
13571 if (Opc == ISD::STRICT_FSETCCS) {
13572 // Expand strict_fsetccs(x, oeq) to
13573 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
13574 SDVTList VTList = Op->getVTList();
13575 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
13576 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
13577 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13578 Op2, OLECCVal);
13579 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
13580 Op1, OLECCVal);
13581 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13582 Tmp1.getValue(1), Tmp2.getValue(1));
13583 // Tmp1 and Tmp2 might be the same node.
13584 if (Tmp1 != Tmp2)
13585 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
13586 return DAG.getMergeValues({Tmp1, OutChain}, DL);
13587 }
13588
13589 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
13590 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
13591 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
13592 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13593 Op2, OEQCCVal);
13594 SDValue Res = DAG.getNOT(DL, OEQ, VT);
13595 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
13596 }
13597 }
13598
13599 MVT ContainerInVT = InVT;
13600 if (InVT.isFixedLengthVector()) {
13601 ContainerInVT = getContainerForFixedLengthVector(InVT);
13602 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
13603 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
13604 }
13605 MVT MaskVT = getMaskTypeFor(ContainerInVT);
13606
13607 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
13608
13609 SDValue Res;
13610 if (Opc == ISD::STRICT_FSETCC &&
13611 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
13612 CCVal == ISD::SETOLE)) {
13613 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
13614 // active when both input elements are ordered.
13615 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
13616 SDValue OrderMask1 = DAG.getNode(
13617 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13618 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13619 True, VL});
13620 SDValue OrderMask2 = DAG.getNode(
13621 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13622 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13623 True, VL});
13624 Mask =
13625 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
13626 // Use Mask as the passthru operand to let the result be 0 if either of the
13627 // inputs is unordered.
13628 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
13629 DAG.getVTList(MaskVT, MVT::Other),
13630 {Chain, Op1, Op2, CC, Mask, Mask, VL});
13631 } else {
13632 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
13633 : RISCVISD::STRICT_FSETCCS_VL;
13634 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
13635 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
13636 }
13637
13638 if (VT.isFixedLengthVector()) {
13639 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
13640 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
13641 }
13642 return Res;
13643}
13644
13645// Lower vector ABS to smax(X, sub(0, X)).
13646SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
13647 SDLoc DL(Op);
13648 MVT VT = Op.getSimpleValueType();
13649 SDValue X = Op.getOperand(0);
13650
13651 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
13652 "Unexpected type for ISD::ABS");
13653
13654 MVT ContainerVT = VT;
13655 if (VT.isFixedLengthVector()) {
13656 ContainerVT = getContainerForFixedLengthVector(VT);
13657 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
13658 }
13659
13660 SDValue Mask, VL;
13661 if (Op->getOpcode() == ISD::VP_ABS) {
13662 Mask = Op->getOperand(1);
13663 if (VT.isFixedLengthVector())
13664 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
13665 Subtarget);
13666 VL = Op->getOperand(2);
13667 } else
13668 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13669
13670 SDValue SplatZero = DAG.getNode(
13671 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
13672 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
13673 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
13674 DAG.getUNDEF(ContainerVT), Mask, VL);
13675 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
13676 DAG.getUNDEF(ContainerVT), Mask, VL);
13677
13678 if (VT.isFixedLengthVector())
13679 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
13680 return Max;
13681}
13682
13683SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
13684 SelectionDAG &DAG) const {
13685 const auto &TSInfo =
13686 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13687
13688 unsigned NewOpc = getRISCVVLOp(Op);
13689 bool HasPassthruOp = TSInfo.hasPassthruOp(NewOpc);
13690 bool HasMask = TSInfo.hasMaskOp(NewOpc);
13691
13692 MVT VT = Op.getSimpleValueType();
13693 MVT ContainerVT = getContainerForFixedLengthVector(VT);
13694
13695 // Create list of operands by converting existing ones to scalable types.
13697 for (const SDValue &V : Op->op_values()) {
13698 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13699
13700 // Pass through non-vector operands.
13701 if (!V.getValueType().isVector()) {
13702 Ops.push_back(V);
13703 continue;
13704 }
13705
13706 // "cast" fixed length vector to a scalable vector.
13707 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
13708 "Only fixed length vectors are supported!");
13709 MVT VContainerVT = ContainerVT.changeVectorElementType(
13710 V.getSimpleValueType().getVectorElementType());
13711 Ops.push_back(convertToScalableVector(VContainerVT, V, DAG, Subtarget));
13712 }
13713
13714 SDLoc DL(Op);
13715 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13716 if (HasPassthruOp)
13717 Ops.push_back(DAG.getUNDEF(ContainerVT));
13718 if (HasMask)
13719 Ops.push_back(Mask);
13720 Ops.push_back(VL);
13721
13722 // StrictFP operations have two result values. Their lowered result should
13723 // have same result count.
13724 if (Op->isStrictFPOpcode()) {
13725 SDValue ScalableRes =
13726 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
13727 Op->getFlags());
13728 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13729 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
13730 }
13731
13732 SDValue ScalableRes =
13733 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
13734 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13735}
13736
13737// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
13738// * Operands of each node are assumed to be in the same order.
13739// * The EVL operand is promoted from i32 to i64 on RV64.
13740// * Fixed-length vectors are converted to their scalable-vector container
13741// types.
13742SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
13743 const auto &TSInfo =
13744 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13745
13746 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13747 bool HasPassthruOp = TSInfo.hasPassthruOp(RISCVISDOpc);
13748
13749 SDLoc DL(Op);
13750 MVT VT = Op.getSimpleValueType();
13752
13753 MVT ContainerVT = VT;
13754 if (VT.isFixedLengthVector())
13755 ContainerVT = getContainerForFixedLengthVector(VT);
13756
13757 for (const auto &OpIdx : enumerate(Op->ops())) {
13758 SDValue V = OpIdx.value();
13759 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13760 // Add dummy passthru value before the mask. Or if there isn't a mask,
13761 // before EVL.
13762 if (HasPassthruOp) {
13763 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
13764 if (MaskIdx) {
13765 if (*MaskIdx == OpIdx.index())
13766 Ops.push_back(DAG.getUNDEF(ContainerVT));
13767 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
13768 OpIdx.index()) {
13769 if (Op.getOpcode() == ISD::VP_MERGE) {
13770 // For VP_MERGE, copy the false operand instead of an undef value.
13771 Ops.push_back(Ops.back());
13772 } else {
13773 assert(Op.getOpcode() == ISD::VP_SELECT);
13774 // For VP_SELECT, add an undef value.
13775 Ops.push_back(DAG.getUNDEF(ContainerVT));
13776 }
13777 }
13778 }
13779 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
13780 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
13781 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
13783 Subtarget.getXLenVT()));
13784 // Pass through operands which aren't fixed-length vectors.
13785 if (!V.getValueType().isFixedLengthVector()) {
13786 Ops.push_back(V);
13787 continue;
13788 }
13789 // "cast" fixed length vector to a scalable vector.
13790 MVT OpVT = V.getSimpleValueType();
13791 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
13792 assert(useRVVForFixedLengthVectorVT(OpVT) &&
13793 "Only fixed length vectors are supported!");
13794 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
13795 }
13796
13797 if (!VT.isFixedLengthVector())
13798 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
13799
13800 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
13801
13802 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
13803}
13804
13805SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
13806 SelectionDAG &DAG) const {
13807 SDLoc DL(Op);
13808 MVT VT = Op.getSimpleValueType();
13809
13810 SDValue Src = Op.getOperand(0);
13811 // NOTE: Mask is dropped.
13812 SDValue VL = Op.getOperand(2);
13813
13814 MVT ContainerVT = VT;
13815 if (VT.isFixedLengthVector()) {
13816 ContainerVT = getContainerForFixedLengthVector(VT);
13817 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
13818 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13819 }
13820
13821 MVT XLenVT = Subtarget.getXLenVT();
13822 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13823 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13824 DAG.getUNDEF(ContainerVT), Zero, VL);
13825
13826 SDValue SplatValue = DAG.getSignedConstant(
13827 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
13828 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13829 DAG.getUNDEF(ContainerVT), SplatValue, VL);
13830
13831 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
13832 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
13833 if (!VT.isFixedLengthVector())
13834 return Result;
13835 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13836}
13837
13838SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
13839 SelectionDAG &DAG) const {
13840 SDLoc DL(Op);
13841 MVT VT = Op.getSimpleValueType();
13842
13843 SDValue Op1 = Op.getOperand(0);
13844 SDValue Op2 = Op.getOperand(1);
13845 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
13846 // NOTE: Mask is dropped.
13847 SDValue VL = Op.getOperand(4);
13848
13849 MVT ContainerVT = VT;
13850 if (VT.isFixedLengthVector()) {
13851 ContainerVT = getContainerForFixedLengthVector(VT);
13852 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13853 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13854 }
13855
13857 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
13858
13859 switch (Condition) {
13860 default:
13861 break;
13862 // X != Y --> (X^Y)
13863 case ISD::SETNE:
13864 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13865 break;
13866 // X == Y --> ~(X^Y)
13867 case ISD::SETEQ: {
13868 SDValue Temp =
13869 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13870 Result =
13871 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
13872 break;
13873 }
13874 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
13875 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
13876 case ISD::SETGT:
13877 case ISD::SETULT: {
13878 SDValue Temp =
13879 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13880 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
13881 break;
13882 }
13883 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
13884 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
13885 case ISD::SETLT:
13886 case ISD::SETUGT: {
13887 SDValue Temp =
13888 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13889 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
13890 break;
13891 }
13892 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
13893 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
13894 case ISD::SETGE:
13895 case ISD::SETULE: {
13896 SDValue Temp =
13897 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13898 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
13899 break;
13900 }
13901 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
13902 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
13903 case ISD::SETLE:
13904 case ISD::SETUGE: {
13905 SDValue Temp =
13906 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13907 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
13908 break;
13909 }
13910 }
13911
13912 if (!VT.isFixedLengthVector())
13913 return Result;
13914 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13915}
13916
13917// Lower Floating-Point/Integer Type-Convert VP SDNodes
13918SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
13919 SelectionDAG &DAG) const {
13920 SDLoc DL(Op);
13921
13922 SDValue Src = Op.getOperand(0);
13923 SDValue Mask = Op.getOperand(1);
13924 SDValue VL = Op.getOperand(2);
13925 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13926
13927 MVT DstVT = Op.getSimpleValueType();
13928 MVT SrcVT = Src.getSimpleValueType();
13929 if (DstVT.isFixedLengthVector()) {
13930 DstVT = getContainerForFixedLengthVector(DstVT);
13931 SrcVT = getContainerForFixedLengthVector(SrcVT);
13932 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13933 MVT MaskVT = getMaskTypeFor(DstVT);
13934 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13935 }
13936
13937 unsigned DstEltSize = DstVT.getScalarSizeInBits();
13938 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
13939
13941 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
13942 if (SrcVT.isInteger()) {
13943 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13944
13945 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
13946 ? RISCVISD::VSEXT_VL
13947 : RISCVISD::VZEXT_VL;
13948
13949 // Do we need to do any pre-widening before converting?
13950 if (SrcEltSize == 1) {
13951 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
13952 MVT XLenVT = Subtarget.getXLenVT();
13953 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13954 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13955 DAG.getUNDEF(IntVT), Zero, VL);
13956 SDValue One = DAG.getSignedConstant(
13957 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
13958 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13959 DAG.getUNDEF(IntVT), One, VL);
13960 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
13961 ZeroSplat, DAG.getUNDEF(IntVT), VL);
13962 } else if (DstEltSize > (2 * SrcEltSize)) {
13963 // Widen before converting.
13964 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
13965 DstVT.getVectorElementCount());
13966 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
13967 }
13968
13969 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13970 } else {
13971 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13972 "Wrong input/output vector types");
13973
13974 // Convert f16 to f32 then convert f32 to i64.
13975 if (DstEltSize > (2 * SrcEltSize)) {
13976 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13977 MVT InterimFVT =
13978 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13979 Src =
13980 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
13981 }
13982
13983 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13984 }
13985 } else { // Narrowing + Conversion
13986 if (SrcVT.isInteger()) {
13987 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13988 // First do a narrowing convert to an FP type half the size, then round
13989 // the FP type to a small FP type if needed.
13990
13991 MVT InterimFVT = DstVT;
13992 if (SrcEltSize > (2 * DstEltSize)) {
13993 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
13994 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13995 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13996 }
13997
13998 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
13999
14000 if (InterimFVT != DstVT) {
14001 Src = Result;
14002 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
14003 }
14004 } else {
14005 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
14006 "Wrong input/output vector types");
14007 // First do a narrowing conversion to an integer half the size, then
14008 // truncate if needed.
14009
14010 if (DstEltSize == 1) {
14011 // First convert to the same size integer, then convert to mask using
14012 // setcc.
14013 assert(SrcEltSize >= 16 && "Unexpected FP type!");
14014 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
14015 DstVT.getVectorElementCount());
14016 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
14017
14018 // Compare the integer result to 0. The integer should be 0 or 1/-1,
14019 // otherwise the conversion was undefined.
14020 MVT XLenVT = Subtarget.getXLenVT();
14021 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
14022 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
14023 DAG.getUNDEF(InterimIVT), SplatZero, VL);
14024 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
14025 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
14026 DAG.getUNDEF(DstVT), Mask, VL});
14027 } else {
14028 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
14029 DstVT.getVectorElementCount());
14030
14031 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
14032
14033 while (InterimIVT != DstVT) {
14034 SrcEltSize /= 2;
14035 Src = Result;
14036 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
14037 DstVT.getVectorElementCount());
14038 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
14039 Src, Mask, VL);
14040 }
14041 }
14042 }
14043 }
14044
14045 MVT VT = Op.getSimpleValueType();
14046 if (!VT.isFixedLengthVector())
14047 return Result;
14048 return convertFromScalableVector(VT, Result, DAG, Subtarget);
14049}
14050
14051SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
14052 SelectionDAG &DAG) const {
14053 SDLoc DL(Op);
14054 MVT VT = Op.getSimpleValueType();
14055 MVT XLenVT = Subtarget.getXLenVT();
14056
14057 SDValue Mask = Op.getOperand(0);
14058 SDValue TrueVal = Op.getOperand(1);
14059 SDValue FalseVal = Op.getOperand(2);
14060 SDValue VL = Op.getOperand(3);
14061
14062 // Use default legalization if a vector of EVL type would be legal.
14063 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
14065 if (isTypeLegal(EVLVecVT))
14066 return SDValue();
14067
14068 MVT ContainerVT = VT;
14069 if (VT.isFixedLengthVector()) {
14070 ContainerVT = getContainerForFixedLengthVector(VT);
14071 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
14072 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
14073 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
14074 }
14075
14076 // Promote to a vector of i8.
14077 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
14078
14079 // Promote TrueVal and FalseVal using VLMax.
14080 // FIXME: Is there a better way to do this?
14081 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
14082 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
14083 DAG.getUNDEF(PromotedVT),
14084 DAG.getConstant(1, DL, XLenVT), VLMax);
14085 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
14086 DAG.getUNDEF(PromotedVT),
14087 DAG.getConstant(0, DL, XLenVT), VLMax);
14088 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
14089 SplatZero, DAG.getUNDEF(PromotedVT), VL);
14090 // Any element past VL uses FalseVal, so use VLMax
14091 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
14092 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
14093
14094 // VP_MERGE the two promoted values.
14095 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
14096 TrueVal, FalseVal, FalseVal, VL);
14097
14098 // Convert back to mask.
14099 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
14100 SDValue Result = DAG.getNode(
14101 RISCVISD::SETCC_VL, DL, ContainerVT,
14102 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
14103 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
14104
14105 if (VT.isFixedLengthVector())
14106 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
14107 return Result;
14108}
14109
14110SDValue
14111RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
14112 SelectionDAG &DAG) const {
14113 using namespace SDPatternMatch;
14114
14115 SDLoc DL(Op);
14116
14117 SDValue Op1 = Op.getOperand(0);
14118 SDValue Op2 = Op.getOperand(1);
14119 SDValue Offset = Op.getOperand(2);
14120 SDValue Mask = Op.getOperand(3);
14121 SDValue EVL1 = Op.getOperand(4);
14122 SDValue EVL2 = Op.getOperand(5);
14123
14124 const MVT XLenVT = Subtarget.getXLenVT();
14125 MVT VT = Op.getSimpleValueType();
14126 MVT ContainerVT = VT;
14127 if (VT.isFixedLengthVector()) {
14128 ContainerVT = getContainerForFixedLengthVector(VT);
14129 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
14130 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
14131 MVT MaskVT = getMaskTypeFor(ContainerVT);
14132 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14133 }
14134
14135 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
14136 if (IsMaskVector) {
14137 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
14138
14139 // Expand input operands
14140 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14141 DAG.getUNDEF(ContainerVT),
14142 DAG.getConstant(1, DL, XLenVT), EVL1);
14143 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14144 DAG.getUNDEF(ContainerVT),
14145 DAG.getConstant(0, DL, XLenVT), EVL1);
14146 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
14147 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
14148
14149 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14150 DAG.getUNDEF(ContainerVT),
14151 DAG.getConstant(1, DL, XLenVT), EVL2);
14152 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14153 DAG.getUNDEF(ContainerVT),
14154 DAG.getConstant(0, DL, XLenVT), EVL2);
14155 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
14156 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
14157 }
14158
14159 auto getVectorFirstEle = [](SDValue Vec) {
14160 SDValue FirstEle;
14161 if (sd_match(Vec, m_InsertElt(m_Value(), m_Value(FirstEle), m_Zero())))
14162 return FirstEle;
14163
14164 if (Vec.getOpcode() == ISD::SPLAT_VECTOR ||
14166 return Vec.getOperand(0);
14167
14168 return SDValue();
14169 };
14170
14171 if (!IsMaskVector && isNullConstant(Offset) && isOneConstant(EVL1))
14172 if (auto FirstEle = getVectorFirstEle(Op->getOperand(0))) {
14173 MVT EltVT = ContainerVT.getVectorElementType();
14175 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
14176 EltVT == MVT::bf16) {
14177 EltVT = EltVT.changeTypeToInteger();
14178 ContainerVT = ContainerVT.changeVectorElementType(EltVT);
14179 Op2 = DAG.getBitcast(ContainerVT, Op2);
14180 FirstEle =
14181 DAG.getAnyExtOrTrunc(DAG.getBitcast(EltVT, FirstEle), DL, XLenVT);
14182 }
14183 Result = DAG.getNode(EltVT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL
14184 : RISCVISD::VSLIDE1UP_VL,
14185 DL, ContainerVT, DAG.getUNDEF(ContainerVT), Op2,
14186 FirstEle, Mask, EVL2);
14187 Result = DAG.getBitcast(
14189 Result);
14190 return VT.isFixedLengthVector()
14191 ? convertFromScalableVector(VT, Result, DAG, Subtarget)
14192 : Result;
14193 }
14194
14195 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
14196 SDValue DownOffset, UpOffset;
14197 if (ImmValue >= 0) {
14198 // The operand is a TargetConstant, we need to rebuild it as a regular
14199 // constant.
14200 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
14201 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
14202 } else {
14203 // The operand is a TargetConstant, we need to rebuild it as a regular
14204 // constant rather than negating the original operand.
14205 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
14206 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
14207 }
14208
14209 if (ImmValue != 0)
14210 Op1 = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
14211 DAG.getUNDEF(ContainerVT), Op1, DownOffset, Mask,
14212 Subtarget.hasVLDependentLatency() ? UpOffset : EVL2);
14213 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, Op1, Op2,
14214 UpOffset, Mask, EVL2, RISCVVType::TAIL_AGNOSTIC);
14215
14216 if (IsMaskVector) {
14217 // Truncate Result back to a mask vector (Result has same EVL as Op2)
14218 Result = DAG.getNode(
14219 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
14220 {Result, DAG.getConstant(0, DL, ContainerVT),
14221 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
14222 Mask, EVL2});
14223 }
14224
14225 if (!VT.isFixedLengthVector())
14226 return Result;
14227 return convertFromScalableVector(VT, Result, DAG, Subtarget);
14228}
14229
14230SDValue
14231RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
14232 SelectionDAG &DAG) const {
14233 SDLoc DL(Op);
14234 MVT VT = Op.getSimpleValueType();
14235 MVT XLenVT = Subtarget.getXLenVT();
14236
14237 SDValue Op1 = Op.getOperand(0);
14238 SDValue Mask = Op.getOperand(1);
14239 SDValue EVL = Op.getOperand(2);
14240
14241 MVT ContainerVT = VT;
14242 if (VT.isFixedLengthVector()) {
14243 ContainerVT = getContainerForFixedLengthVector(VT);
14244 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
14245 MVT MaskVT = getMaskTypeFor(ContainerVT);
14246 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14247 }
14248
14249 MVT GatherVT = ContainerVT;
14250 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
14251 // Check if we are working with mask vectors
14252 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
14253 if (IsMaskVector) {
14254 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
14255
14256 // Expand input operand
14257 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
14258 DAG.getUNDEF(IndicesVT),
14259 DAG.getConstant(1, DL, XLenVT), EVL);
14260 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
14261 DAG.getUNDEF(IndicesVT),
14262 DAG.getConstant(0, DL, XLenVT), EVL);
14263 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
14264 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
14265 }
14266
14267 unsigned EltSize = GatherVT.getScalarSizeInBits();
14268 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
14269 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
14270 unsigned MaxVLMAX =
14271 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
14272
14273 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
14274 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
14275 // to use vrgatherei16.vv.
14276 // TODO: It's also possible to use vrgatherei16.vv for other types to
14277 // decrease register width for the index calculation.
14278 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
14279 if (MaxVLMAX > 256 && EltSize == 8) {
14280 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
14281 // Split the vector in half and reverse each half using a full register
14282 // reverse.
14283 // Swap the halves and concatenate them.
14284 // Slide the concatenated result by (VLMax - VL).
14285 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
14286 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
14287 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
14288
14289 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
14290 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
14291
14292 // Reassemble the low and high pieces reversed.
14293 // NOTE: this Result is unmasked (because we do not need masks for
14294 // shuffles). If in the future this has to change, we can use a SELECT_VL
14295 // between Result and UNDEF using the mask originally passed to VP_REVERSE
14296 SDValue Result =
14297 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
14298
14299 // Slide off any elements from past EVL that were reversed into the low
14300 // elements.
14301 SDValue VLMax =
14302 DAG.getElementCount(DL, XLenVT, GatherVT.getVectorElementCount());
14303 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
14304
14305 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
14306 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
14307
14308 if (IsMaskVector) {
14309 // Truncate Result back to a mask vector
14310 Result =
14311 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
14312 {Result, DAG.getConstant(0, DL, GatherVT),
14314 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
14315 }
14316
14317 if (!VT.isFixedLengthVector())
14318 return Result;
14319 return convertFromScalableVector(VT, Result, DAG, Subtarget);
14320 }
14321
14322 // Just promote the int type to i16 which will double the LMUL.
14323 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
14324 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
14325 }
14326
14327 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
14328 SDValue VecLen =
14329 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
14330 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
14331 DAG.getUNDEF(IndicesVT), VecLen, EVL);
14332 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
14333 DAG.getUNDEF(IndicesVT), Mask, EVL);
14334 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
14335 DAG.getUNDEF(GatherVT), Mask, EVL);
14336
14337 if (IsMaskVector) {
14338 // Truncate Result back to a mask vector
14339 Result = DAG.getNode(
14340 RISCVISD::SETCC_VL, DL, ContainerVT,
14341 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
14342 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
14343 }
14344
14345 if (!VT.isFixedLengthVector())
14346 return Result;
14347 return convertFromScalableVector(VT, Result, DAG, Subtarget);
14348}
14349
14350SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
14351 SelectionDAG &DAG) const {
14352 MVT VT = Op.getSimpleValueType();
14353 if (VT.getVectorElementType() != MVT::i1)
14354 return lowerVPOp(Op, DAG);
14355
14356 // It is safe to drop mask parameter as masked-off elements are undef.
14357 SDValue Op1 = Op->getOperand(0);
14358 SDValue Op2 = Op->getOperand(1);
14359 SDValue VL = Op->getOperand(3);
14360
14361 MVT ContainerVT = VT;
14362 const bool IsFixed = VT.isFixedLengthVector();
14363 if (IsFixed) {
14364 ContainerVT = getContainerForFixedLengthVector(VT);
14365 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
14366 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
14367 }
14368
14369 SDLoc DL(Op);
14370 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
14371 if (!IsFixed)
14372 return Val;
14373 return convertFromScalableVector(VT, Val, DAG, Subtarget);
14374}
14375
14376SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
14377 SelectionDAG &DAG) const {
14378 SDLoc DL(Op);
14379 MVT XLenVT = Subtarget.getXLenVT();
14380 MVT VT = Op.getSimpleValueType();
14381 MVT ContainerVT = VT;
14382 if (VT.isFixedLengthVector())
14383 ContainerVT = getContainerForFixedLengthVector(VT);
14384
14385 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
14386
14387 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
14388 // Check if the mask is known to be all ones
14389 SDValue Mask = VPNode->getMask();
14390 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14391
14392 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
14393 : Intrinsic::riscv_vlse_mask,
14394 DL, XLenVT);
14395 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
14396 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
14397 VPNode->getStride()};
14398 if (!IsUnmasked) {
14399 if (VT.isFixedLengthVector()) {
14400 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
14401 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14402 }
14403 Ops.push_back(Mask);
14404 }
14405 Ops.push_back(VPNode->getVectorLength());
14406 if (!IsUnmasked) {
14407 SDValue Policy =
14409 Ops.push_back(Policy);
14410 }
14411
14412 SDValue Result =
14414 VPNode->getMemoryVT(), VPNode->getMemOperand());
14415 SDValue Chain = Result.getValue(1);
14416
14417 if (VT.isFixedLengthVector())
14418 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
14419
14420 return DAG.getMergeValues({Result, Chain}, DL);
14421}
14422
14423SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
14424 SelectionDAG &DAG) const {
14425 SDLoc DL(Op);
14426 MVT XLenVT = Subtarget.getXLenVT();
14427
14428 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
14429 SDValue StoreVal = VPNode->getValue();
14430 MVT VT = StoreVal.getSimpleValueType();
14431 MVT ContainerVT = VT;
14432 if (VT.isFixedLengthVector()) {
14433 ContainerVT = getContainerForFixedLengthVector(VT);
14434 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
14435 }
14436
14437 // Check if the mask is known to be all ones
14438 SDValue Mask = VPNode->getMask();
14439 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14440
14441 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
14442 : Intrinsic::riscv_vsse_mask,
14443 DL, XLenVT);
14444 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
14445 VPNode->getBasePtr(), VPNode->getStride()};
14446 if (!IsUnmasked) {
14447 if (VT.isFixedLengthVector()) {
14448 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
14449 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14450 }
14451 Ops.push_back(Mask);
14452 }
14453 Ops.push_back(VPNode->getVectorLength());
14454
14455 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
14456 Ops, VPNode->getMemoryVT(),
14457 VPNode->getMemOperand());
14458}
14459
14460// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
14461// matched to a RVV indexed load. The RVV indexed load instructions only
14462// support the "unsigned unscaled" addressing mode; indices are implicitly
14463// zero-extended or truncated to XLEN and are treated as byte offsets. Any
14464// signed or scaled indexing is extended to the XLEN value type and scaled
14465// accordingly.
14466SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
14467 SelectionDAG &DAG) const {
14468 SDLoc DL(Op);
14469 MVT VT = Op.getSimpleValueType();
14470
14471 const auto *MemSD = cast<MemSDNode>(Op.getNode());
14472 EVT MemVT = MemSD->getMemoryVT();
14473 MachineMemOperand *MMO = MemSD->getMemOperand();
14474 SDValue Chain = MemSD->getChain();
14475 SDValue BasePtr = MemSD->getBasePtr();
14476
14477 [[maybe_unused]] ISD::LoadExtType LoadExtType;
14478 SDValue Index, Mask, PassThru, VL;
14479
14480 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
14481 Index = VPGN->getIndex();
14482 Mask = VPGN->getMask();
14483 PassThru = DAG.getUNDEF(VT);
14484 VL = VPGN->getVectorLength();
14485 // VP doesn't support extending loads.
14487 } else {
14488 // Else it must be a MGATHER.
14489 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
14490 Index = MGN->getIndex();
14491 Mask = MGN->getMask();
14492 PassThru = MGN->getPassThru();
14493 LoadExtType = MGN->getExtensionType();
14494 }
14495
14496 MVT IndexVT = Index.getSimpleValueType();
14497 MVT XLenVT = Subtarget.getXLenVT();
14498
14500 "Unexpected VTs!");
14501 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14502 // Targets have to explicitly opt-in for extending vector loads.
14503 assert(LoadExtType == ISD::NON_EXTLOAD &&
14504 "Unexpected extending MGATHER/VP_GATHER");
14505
14506 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14507 // the selection of the masked intrinsics doesn't do this for us.
14508 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14509
14510 MVT ContainerVT = VT;
14511 if (VT.isFixedLengthVector()) {
14512 ContainerVT = getContainerForFixedLengthVector(VT);
14513 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14514 ContainerVT.getVectorElementCount());
14515
14516 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14517
14518 if (!IsUnmasked) {
14519 MVT MaskVT = getMaskTypeFor(ContainerVT);
14520 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14521 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
14522 }
14523 }
14524
14525 if (!VL)
14526 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14527
14528 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14529 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14530 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14531 }
14532
14533 unsigned IntID =
14534 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
14535 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14536 if (IsUnmasked)
14537 Ops.push_back(DAG.getUNDEF(ContainerVT));
14538 else
14539 Ops.push_back(PassThru);
14540 Ops.push_back(BasePtr);
14541 Ops.push_back(Index);
14542 if (!IsUnmasked)
14543 Ops.push_back(Mask);
14544 Ops.push_back(VL);
14545 if (!IsUnmasked)
14546 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
14547
14548 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
14549 SDValue Result =
14550 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
14551 Chain = Result.getValue(1);
14552
14553 if (VT.isFixedLengthVector())
14554 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
14555
14556 return DAG.getMergeValues({Result, Chain}, DL);
14557}
14558
14559// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
14560// matched to a RVV indexed store. The RVV indexed store instructions only
14561// support the "unsigned unscaled" addressing mode; indices are implicitly
14562// zero-extended or truncated to XLEN and are treated as byte offsets. Any
14563// signed or scaled indexing is extended to the XLEN value type and scaled
14564// accordingly.
14565SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
14566 SelectionDAG &DAG) const {
14567 SDLoc DL(Op);
14568 const auto *MemSD = cast<MemSDNode>(Op.getNode());
14569 EVT MemVT = MemSD->getMemoryVT();
14570 MachineMemOperand *MMO = MemSD->getMemOperand();
14571 SDValue Chain = MemSD->getChain();
14572 SDValue BasePtr = MemSD->getBasePtr();
14573
14574 [[maybe_unused]] bool IsTruncatingStore = false;
14575 SDValue Index, Mask, Val, VL;
14576
14577 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
14578 Index = VPSN->getIndex();
14579 Mask = VPSN->getMask();
14580 Val = VPSN->getValue();
14581 VL = VPSN->getVectorLength();
14582 // VP doesn't support truncating stores.
14583 IsTruncatingStore = false;
14584 } else {
14585 // Else it must be a MSCATTER.
14586 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
14587 Index = MSN->getIndex();
14588 Mask = MSN->getMask();
14589 Val = MSN->getValue();
14590 IsTruncatingStore = MSN->isTruncatingStore();
14591 }
14592
14593 MVT VT = Val.getSimpleValueType();
14594 MVT IndexVT = Index.getSimpleValueType();
14595 MVT XLenVT = Subtarget.getXLenVT();
14596
14598 "Unexpected VTs!");
14599 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14600 // Targets have to explicitly opt-in for extending vector loads and
14601 // truncating vector stores.
14602 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
14603
14604 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14605 // the selection of the masked intrinsics doesn't do this for us.
14606 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14607
14608 MVT ContainerVT = VT;
14609 if (VT.isFixedLengthVector()) {
14610 ContainerVT = getContainerForFixedLengthVector(VT);
14611 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14612 ContainerVT.getVectorElementCount());
14613
14614 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14615 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
14616
14617 if (!IsUnmasked) {
14618 MVT MaskVT = getMaskTypeFor(ContainerVT);
14619 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14620 }
14621 }
14622
14623 if (!VL)
14624 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14625
14626 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14627 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14628 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14629 }
14630
14631 unsigned IntID =
14632 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
14633 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14634 Ops.push_back(Val);
14635 Ops.push_back(BasePtr);
14636 Ops.push_back(Index);
14637 if (!IsUnmasked)
14638 Ops.push_back(Mask);
14639 Ops.push_back(VL);
14640
14642 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
14643}
14644
14645SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
14646 SelectionDAG &DAG) const {
14647 const MVT XLenVT = Subtarget.getXLenVT();
14648 SDLoc DL(Op);
14649 SDValue Chain = Op->getOperand(0);
14650 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14651 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14652 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14653
14654 // Encoding used for rounding mode in RISC-V differs from that used in
14655 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
14656 // table, which consists of a sequence of 4-bit fields, each representing
14657 // corresponding FLT_ROUNDS mode.
14658 static const int Table =
14664
14665 SDValue Shift =
14666 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
14667 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14668 DAG.getConstant(Table, DL, XLenVT), Shift);
14669 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14670 DAG.getConstant(7, DL, XLenVT));
14671
14672 return DAG.getMergeValues({Masked, Chain}, DL);
14673}
14674
14675SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
14676 SelectionDAG &DAG) const {
14677 const MVT XLenVT = Subtarget.getXLenVT();
14678 SDLoc DL(Op);
14679 SDValue Chain = Op->getOperand(0);
14680 SDValue RMValue = Op->getOperand(1);
14681 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14682
14683 // Encoding used for rounding mode in RISC-V differs from that used in
14684 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
14685 // a table, which consists of a sequence of 4-bit fields, each representing
14686 // corresponding RISC-V mode.
14687 static const unsigned Table =
14693
14694 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
14695
14696 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
14697 DAG.getConstant(2, DL, XLenVT));
14698 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14699 DAG.getConstant(Table, DL, XLenVT), Shift);
14700 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14701 DAG.getConstant(0x7, DL, XLenVT));
14702 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14703 RMValue);
14704}
14705
14706SDValue RISCVTargetLowering::lowerGET_FPENV(SDValue Op,
14707 SelectionDAG &DAG) const {
14708 const MVT XLenVT = Subtarget.getXLenVT();
14709 SDLoc DL(Op);
14710 SDValue Chain = Op->getOperand(0);
14711 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14712 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14713 return DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14714}
14715
14716SDValue RISCVTargetLowering::lowerSET_FPENV(SDValue Op,
14717 SelectionDAG &DAG) const {
14718 const MVT XLenVT = Subtarget.getXLenVT();
14719 SDLoc DL(Op);
14720 SDValue Chain = Op->getOperand(0);
14721 SDValue EnvValue = Op->getOperand(1);
14722 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14723
14724 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14725 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14726 EnvValue);
14727}
14728
14729SDValue RISCVTargetLowering::lowerRESET_FPENV(SDValue Op,
14730 SelectionDAG &DAG) const {
14731 const MVT XLenVT = Subtarget.getXLenVT();
14732 SDLoc DL(Op);
14733 SDValue Chain = Op->getOperand(0);
14734 SDValue EnvValue = DAG.getRegister(RISCV::X0, XLenVT);
14735 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14736
14737 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14738 EnvValue);
14739}
14740
14743
14744SDValue RISCVTargetLowering::lowerGET_FPMODE(SDValue Op,
14745 SelectionDAG &DAG) const {
14746 const MVT XLenVT = Subtarget.getXLenVT();
14747 SDLoc DL(Op);
14748 SDValue Chain = Op->getOperand(0);
14749 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14750 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14751 SDValue Result = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14752 Chain = Result.getValue(1);
14753 return DAG.getMergeValues({Result, Chain}, DL);
14754}
14755
14756SDValue RISCVTargetLowering::lowerSET_FPMODE(SDValue Op,
14757 SelectionDAG &DAG) const {
14758 const MVT XLenVT = Subtarget.getXLenVT();
14759 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14760 SDLoc DL(Op);
14761 SDValue Chain = Op->getOperand(0);
14762 SDValue EnvValue = Op->getOperand(1);
14763 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14764 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14765
14766 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14767 EnvValue = DAG.getNode(ISD::AND, DL, XLenVT, EnvValue, ModeMask);
14768 Chain = DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14769 ModeMask);
14770 return DAG.getNode(RISCVISD::SET_CSR, DL, MVT::Other, Chain, SysRegNo,
14771 EnvValue);
14772}
14773
14774SDValue RISCVTargetLowering::lowerRESET_FPMODE(SDValue Op,
14775 SelectionDAG &DAG) const {
14776 const MVT XLenVT = Subtarget.getXLenVT();
14777 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14778 SDLoc DL(Op);
14779 SDValue Chain = Op->getOperand(0);
14780 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14781 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14782
14783 return DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14784 ModeMask);
14785}
14786
14787SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
14788 SelectionDAG &DAG) const {
14789 MachineFunction &MF = DAG.getMachineFunction();
14790
14791 bool isRISCV64 = Subtarget.is64Bit();
14792 EVT PtrVT = getPointerTy(DAG.getDataLayout());
14793
14794 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
14795 return DAG.getFrameIndex(FI, PtrVT);
14796}
14797
14798// Returns the opcode of the target-specific SDNode that implements the 32-bit
14799// form of the given Opcode.
14800static unsigned getRISCVWOpcode(unsigned Opcode) {
14801 switch (Opcode) {
14802 default:
14803 llvm_unreachable("Unexpected opcode");
14804 case ISD::SHL:
14805 return RISCVISD::SLLW;
14806 case ISD::SRA:
14807 return RISCVISD::SRAW;
14808 case ISD::SRL:
14809 return RISCVISD::SRLW;
14810 case ISD::SDIV:
14811 return RISCVISD::DIVW;
14812 case ISD::UDIV:
14813 return RISCVISD::DIVUW;
14814 case ISD::UREM:
14815 return RISCVISD::REMUW;
14816 case ISD::ROTL:
14817 return RISCVISD::ROLW;
14818 case ISD::ROTR:
14819 return RISCVISD::RORW;
14820 }
14821}
14822
14823// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
14824// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
14825// otherwise be promoted to i64, making it difficult to select the
14826// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
14827// type i8/i16/i32 is lost.
14829 unsigned ExtOpc = ISD::ANY_EXTEND) {
14830 SDLoc DL(N);
14831 unsigned WOpcode = getRISCVWOpcode(N->getOpcode());
14832 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
14833 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
14834 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
14835 // ReplaceNodeResults requires we maintain the same type for the return value.
14836 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
14837}
14838
14839// Converts the given 32-bit operation to a i64 operation with signed extension
14840// semantic to reduce the signed extension instructions.
14842 SDLoc DL(N);
14843 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14844 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14845 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
14846 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
14847 DAG.getValueType(MVT::i32));
14848 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
14849}
14850
14853 SelectionDAG &DAG) const {
14854 SDLoc DL(N);
14855 switch (N->getOpcode()) {
14856 default:
14857 llvm_unreachable("Don't know how to custom type legalize this operation!");
14860 case ISD::FP_TO_SINT:
14861 case ISD::FP_TO_UINT: {
14862 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14863 "Unexpected custom legalisation");
14864 bool IsStrict = N->isStrictFPOpcode();
14865 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
14866 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
14867 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
14868 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14870 if (!isTypeLegal(Op0.getValueType()))
14871 return;
14872 if (IsStrict) {
14873 SDValue Chain = N->getOperand(0);
14874 // In absence of Zfh, promote f16 to f32, then convert.
14875 if (Op0.getValueType() == MVT::f16 &&
14876 !Subtarget.hasStdExtZfhOrZhinx()) {
14877 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
14878 {Chain, Op0});
14879 Chain = Op0.getValue(1);
14880 }
14881 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
14882 : RISCVISD::STRICT_FCVT_WU_RV64;
14883 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
14884 SDValue Res = DAG.getNode(
14885 Opc, DL, VTs, Chain, Op0,
14886 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14887 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14888 Results.push_back(Res.getValue(1));
14889 return;
14890 }
14891 // For bf16, or f16 in absence of Zfh, promote [b]f16 to f32 and then
14892 // convert.
14893 if ((Op0.getValueType() == MVT::f16 &&
14894 !Subtarget.hasStdExtZfhOrZhinx()) ||
14895 Op0.getValueType() == MVT::bf16)
14896 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14897
14898 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
14899 SDValue Res =
14900 DAG.getNode(Opc, DL, MVT::i64, Op0,
14901 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14902 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14903 return;
14904 }
14905 // If the FP type needs to be softened, emit a library call using the 'si'
14906 // version. If we left it to default legalization we'd end up with 'di'. If
14907 // the FP type doesn't need to be softened just let generic type
14908 // legalization promote the result type.
14909 RTLIB::Libcall LC;
14910 if (IsSigned)
14911 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
14912 else
14913 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
14914 MakeLibCallOptions CallOptions;
14915 EVT OpVT = Op0.getValueType();
14916 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0));
14917 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
14918 SDValue Result;
14919 std::tie(Result, Chain) =
14920 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
14921 Results.push_back(Result);
14922 if (IsStrict)
14923 Results.push_back(Chain);
14924 break;
14925 }
14926 case ISD::LROUND: {
14927 SDValue Op0 = N->getOperand(0);
14928 EVT Op0VT = Op0.getValueType();
14929 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14931 if (!isTypeLegal(Op0VT))
14932 return;
14933
14934 // In absence of Zfh, promote f16 to f32, then convert.
14935 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
14936 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14937
14938 SDValue Res =
14939 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
14940 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
14941 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14942 return;
14943 }
14944 // If the FP type needs to be softened, emit a library call to lround. We'll
14945 // need to truncate the result. We assume any value that doesn't fit in i32
14946 // is allowed to return an unspecified value.
14947 RTLIB::Libcall LC =
14948 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
14949 MakeLibCallOptions CallOptions;
14950 EVT OpVT = Op0.getValueType();
14951 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
14952 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
14953 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
14954 Results.push_back(Result);
14955 break;
14956 }
14959 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
14960 "has custom type legalization on riscv32");
14961
14962 SDValue LoCounter, HiCounter;
14963 MVT XLenVT = Subtarget.getXLenVT();
14964 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
14965 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
14966 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
14967 } else {
14968 LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
14969 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
14970 }
14971 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
14972 SDValue RCW = DAG.getNode(RISCVISD::READ_COUNTER_WIDE, DL, VTs,
14973 N->getOperand(0), LoCounter, HiCounter);
14974
14975 Results.push_back(
14976 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
14977 Results.push_back(RCW.getValue(2));
14978 break;
14979 }
14980 case ISD::LOAD: {
14981 if (!ISD::isNON_EXTLoad(N))
14982 return;
14983
14984 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
14985 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
14987
14988 if (N->getValueType(0) == MVT::i64) {
14989 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
14990 "Unexpected custom legalisation");
14991
14992 if (Ld->getAlign() < Subtarget.getZilsdAlign())
14993 return;
14994
14995 SDLoc DL(N);
14996 SDValue Result = DAG.getMemIntrinsicNode(
14997 RISCVISD::LD_RV32, DL,
14998 DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
14999 {Ld->getChain(), Ld->getBasePtr()}, MVT::i64, Ld->getMemOperand());
15000 SDValue Lo = Result.getValue(0);
15001 SDValue Hi = Result.getValue(1);
15002 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
15003 Results.append({Pair, Result.getValue(2)});
15004 return;
15005 }
15006
15007 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15008 "Unexpected custom legalisation");
15009
15010 SDLoc dl(N);
15011 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
15012 Ld->getBasePtr(), Ld->getMemoryVT(),
15013 Ld->getMemOperand());
15014 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
15015 Results.push_back(Res.getValue(1));
15016 return;
15017 }
15018 case ISD::MUL: {
15019 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
15020 unsigned XLen = Subtarget.getXLen();
15021 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
15022 if (Size > XLen) {
15023 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
15024 SDValue LHS = N->getOperand(0);
15025 SDValue RHS = N->getOperand(1);
15026 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
15027
15028 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
15029 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
15030 // We need exactly one side to be unsigned.
15031 if (LHSIsU == RHSIsU)
15032 return;
15033
15034 auto MakeMULPair = [&](SDValue S, SDValue U) {
15035 MVT XLenVT = Subtarget.getXLenVT();
15036 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
15037 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
15038 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
15039 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
15040 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
15041 };
15042
15043 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
15044 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
15045
15046 // The other operand should be signed, but still prefer MULH when
15047 // possible.
15048 if (RHSIsU && LHSIsS && !RHSIsS)
15049 Results.push_back(MakeMULPair(LHS, RHS));
15050 else if (LHSIsU && RHSIsS && !LHSIsS)
15051 Results.push_back(MakeMULPair(RHS, LHS));
15052
15053 return;
15054 }
15055 [[fallthrough]];
15056 }
15057 case ISD::ADD:
15058 case ISD::SUB:
15059 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15060 "Unexpected custom legalisation");
15061 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
15062 break;
15063 case ISD::SHL:
15064 case ISD::SRA:
15065 case ISD::SRL:
15066 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15067 "Unexpected custom legalisation");
15068 if (N->getOperand(1).getOpcode() != ISD::Constant) {
15069 // If we can use a BSET instruction, allow default promotion to apply.
15070 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
15071 isOneConstant(N->getOperand(0)))
15072 break;
15073 Results.push_back(customLegalizeToWOp(N, DAG));
15074 break;
15075 }
15076
15077 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
15078 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
15079 // shift amount.
15080 if (N->getOpcode() == ISD::SHL) {
15081 SDLoc DL(N);
15082 SDValue NewOp0 =
15083 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
15084 SDValue NewOp1 =
15085 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
15086 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
15087 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
15088 DAG.getValueType(MVT::i32));
15089 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
15090 }
15091
15092 break;
15093 case ISD::ROTL:
15094 case ISD::ROTR:
15095 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15096 "Unexpected custom legalisation");
15097 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
15098 Subtarget.hasVendorXTHeadBb()) &&
15099 "Unexpected custom legalization");
15100 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
15101 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
15102 return;
15103 Results.push_back(customLegalizeToWOp(N, DAG));
15104 break;
15105 case ISD::CTTZ:
15107 case ISD::CTLZ:
15109 case ISD::CTLS: {
15110 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15111 "Unexpected custom legalisation");
15112
15113 SDValue NewOp0 =
15114 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
15115 unsigned Opc;
15116 switch (N->getOpcode()) {
15117 default: llvm_unreachable("Unexpected opcode");
15118 case ISD::CTTZ:
15120 Opc = RISCVISD::CTZW;
15121 break;
15122 case ISD::CTLZ:
15124 Opc = RISCVISD::CLZW;
15125 break;
15126 case ISD::CTLS:
15127 Opc = RISCVISD::CLSW;
15128 break;
15129 }
15130
15131 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
15132 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15133 return;
15134 }
15135 case ISD::SDIV:
15136 case ISD::UDIV:
15137 case ISD::UREM: {
15138 MVT VT = N->getSimpleValueType(0);
15139 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
15140 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
15141 "Unexpected custom legalisation");
15142 // Don't promote division/remainder by constant since we should expand those
15143 // to multiply by magic constant.
15144 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
15145 if (N->getOperand(1).getOpcode() == ISD::Constant &&
15146 !isIntDivCheap(N->getValueType(0), Attr))
15147 return;
15148
15149 // If the input is i32, use ANY_EXTEND since the W instructions don't read
15150 // the upper 32 bits. For other types we need to sign or zero extend
15151 // based on the opcode.
15152 unsigned ExtOpc = ISD::ANY_EXTEND;
15153 if (VT != MVT::i32)
15154 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
15156
15157 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
15158 break;
15159 }
15160 case ISD::SADDO:
15161 case ISD::SSUBO: {
15162 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15163 "Unexpected custom legalisation");
15164
15165 // This is similar to the default legalization, but we return the
15166 // sext_inreg instead of the add/sub.
15167 bool IsAdd = N->getOpcode() == ISD::SADDO;
15168 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
15169 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
15170 SDValue Op =
15171 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
15172 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Op,
15173 DAG.getValueType(MVT::i32));
15174
15175 SDValue Overflow;
15176
15177 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
15178 // use the default legalization.
15179 if (IsAdd && isa<ConstantSDNode>(N->getOperand(1))) {
15180 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
15181
15182 // For an addition, the result should be less than one of the operands
15183 // (LHS) if and only if the other operand (RHS) is negative, otherwise
15184 // there will be overflow.
15185 EVT OType = N->getValueType(1);
15186 SDValue ResultLowerThanLHS =
15187 DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
15188 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
15189
15190 Overflow =
15191 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
15192 } else {
15193 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, Op, ISD::SETNE);
15194 }
15195
15196 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15197 Results.push_back(Overflow);
15198 return;
15199 }
15200 case ISD::UADDO:
15201 case ISD::USUBO: {
15202 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15203 "Unexpected custom legalisation");
15204 bool IsAdd = N->getOpcode() == ISD::UADDO;
15205 // Create an ADDW or SUBW.
15206 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
15207 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15208 SDValue Res =
15209 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
15210 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
15211 DAG.getValueType(MVT::i32));
15212
15213 SDValue Overflow;
15214 if (IsAdd && isOneConstant(RHS)) {
15215 // Special case uaddo X, 1 overflowed if the addition result is 0.
15216 // The general case (X + C) < C is not necessarily beneficial. Although we
15217 // reduce the live range of X, we may introduce the materialization of
15218 // constant C, especially when the setcc result is used by branch. We have
15219 // no compare with constant and branch instructions.
15220 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
15221 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
15222 } else if (IsAdd && isAllOnesConstant(RHS)) {
15223 // Special case uaddo X, -1 overflowed if X != 0.
15224 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
15225 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
15226 } else {
15227 // Sign extend the LHS and perform an unsigned compare with the ADDW
15228 // result. Since the inputs are sign extended from i32, this is equivalent
15229 // to comparing the lower 32 bits.
15230 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
15231 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
15232 IsAdd ? ISD::SETULT : ISD::SETUGT);
15233 }
15234
15235 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15236 Results.push_back(Overflow);
15237 return;
15238 }
15239 case ISD::UADDSAT:
15240 case ISD::USUBSAT: {
15241 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15242 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
15243 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
15244 // promotion for UADDO/USUBO.
15245 Results.push_back(expandAddSubSat(N, DAG));
15246 return;
15247 }
15248 case ISD::SADDSAT:
15249 case ISD::SSUBSAT: {
15250 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15251 "Unexpected custom legalisation");
15252 Results.push_back(expandAddSubSat(N, DAG));
15253 return;
15254 }
15255 case ISD::ABS: {
15256 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15257 "Unexpected custom legalisation");
15258
15259 if (Subtarget.hasStdExtP()) {
15260 SDValue Src =
15261 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
15262 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
15263 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
15264 return;
15265 }
15266
15267 if (Subtarget.hasStdExtZbb()) {
15268 // Emit a special node that will be expanded to NEGW+MAX at isel.
15269 // This allows us to remember that the result is sign extended. Expanding
15270 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
15271 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
15272 N->getOperand(0));
15273 SDValue Abs = DAG.getNode(RISCVISD::NEGW_MAX, DL, MVT::i64, Src);
15274 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
15275 return;
15276 }
15277
15278 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
15279 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
15280
15281 // Freeze the source so we can increase it's use count.
15282 Src = DAG.getFreeze(Src);
15283
15284 // Copy sign bit to all bits using the sraiw pattern.
15285 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
15286 DAG.getValueType(MVT::i32));
15287 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
15288 DAG.getConstant(31, DL, MVT::i64));
15289
15290 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
15291 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
15292
15293 // NOTE: The result is only required to be anyextended, but sext is
15294 // consistent with type legalization of sub.
15295 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
15296 DAG.getValueType(MVT::i32));
15297 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
15298 return;
15299 }
15300 case ISD::BITCAST: {
15301 EVT VT = N->getValueType(0);
15302 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
15303 SDValue Op0 = N->getOperand(0);
15304 EVT Op0VT = Op0.getValueType();
15305 MVT XLenVT = Subtarget.getXLenVT();
15306 if (VT == MVT::i16 &&
15307 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
15308 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
15309 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
15310 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
15311 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
15312 Subtarget.hasStdExtFOrZfinx()) {
15313 SDValue FPConv =
15314 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
15315 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
15316 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
15317 Subtarget.hasStdExtDOrZdinx()) {
15318 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
15319 DAG.getVTList(MVT::i32, MVT::i32), Op0);
15320 SDValue Lo = NewReg.getValue(0);
15321 SDValue Hi = NewReg.getValue(1);
15322 // For big-endian, swap the order when building the i64 pair.
15323 if (!Subtarget.isLittleEndian())
15324 std::swap(Lo, Hi);
15325 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
15326 Results.push_back(RetReg);
15327 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
15328 isTypeLegal(Op0VT)) {
15329 // Custom-legalize bitcasts from fixed-length vector types to illegal
15330 // scalar types in order to improve codegen. Bitcast the vector to a
15331 // one-element vector type whose element type is the same as the result
15332 // type, and extract the first element.
15333 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
15334 if (isTypeLegal(BVT)) {
15335 SDValue BVec = DAG.getBitcast(BVT, Op0);
15336 Results.push_back(DAG.getExtractVectorElt(DL, VT, BVec, 0));
15337 }
15338 }
15339 break;
15340 }
15341 case ISD::BITREVERSE: {
15342 assert(N->getValueType(0) == MVT::i8 && Subtarget.hasStdExtZbkb() &&
15343 "Unexpected custom legalisation");
15344 MVT XLenVT = Subtarget.getXLenVT();
15345 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
15346 SDValue NewRes = DAG.getNode(RISCVISD::BREV8, DL, XLenVT, NewOp);
15347 // ReplaceNodeResults requires we maintain the same type for the return
15348 // value.
15349 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, NewRes));
15350 break;
15351 }
15352 case RISCVISD::BREV8:
15353 case RISCVISD::ORC_B: {
15354 MVT VT = N->getSimpleValueType(0);
15355 MVT XLenVT = Subtarget.getXLenVT();
15356 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
15357 "Unexpected custom legalisation");
15358 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
15359 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
15360 "Unexpected extension");
15361 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
15362 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
15363 // ReplaceNodeResults requires we maintain the same type for the return
15364 // value.
15365 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
15366 break;
15367 }
15368 case RISCVISD::PASUB:
15369 case RISCVISD::PASUBU:
15370 case RISCVISD::PMULHSU:
15371 case RISCVISD::PMULHR:
15372 case RISCVISD::PMULHRU:
15373 case RISCVISD::PMULHRSU: {
15374 MVT VT = N->getSimpleValueType(0);
15375 SDValue Op0 = N->getOperand(0);
15376 SDValue Op1 = N->getOperand(1);
15377 unsigned Opcode = N->getOpcode();
15378 // PMULH* variants don't support i8
15379 [[maybe_unused]] bool IsMulH =
15380 Opcode == RISCVISD::PMULHSU || Opcode == RISCVISD::PMULHR ||
15381 Opcode == RISCVISD::PMULHRU || Opcode == RISCVISD::PMULHRSU;
15382 assert(VT == MVT::v2i16 || (!IsMulH && VT == MVT::v4i8));
15383 MVT NewVT = MVT::v4i16;
15384 if (VT == MVT::v4i8)
15385 NewVT = MVT::v8i8;
15386 SDValue Undef = DAG.getUNDEF(VT);
15387 Op0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, {Op0, Undef});
15388 Op1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, {Op1, Undef});
15389 Results.push_back(DAG.getNode(Opcode, DL, NewVT, {Op0, Op1}));
15390 return;
15391 }
15393 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
15394 // type is illegal (currently only vXi64 RV32).
15395 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
15396 // transferred to the destination register. We issue two of these from the
15397 // upper- and lower- halves of the SEW-bit vector element, slid down to the
15398 // first element.
15399 SDValue Vec = N->getOperand(0);
15400 SDValue Idx = N->getOperand(1);
15401
15402 // The vector type hasn't been legalized yet so we can't issue target
15403 // specific nodes if it needs legalization.
15404 // FIXME: We would manually legalize if it's important.
15405 if (!isTypeLegal(Vec.getValueType()))
15406 return;
15407
15408 MVT VecVT = Vec.getSimpleValueType();
15409
15410 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
15411 VecVT.getVectorElementType() == MVT::i64 &&
15412 "Unexpected EXTRACT_VECTOR_ELT legalization");
15413
15414 // If this is a fixed vector, we need to convert it to a scalable vector.
15415 MVT ContainerVT = VecVT;
15416 if (VecVT.isFixedLengthVector()) {
15417 ContainerVT = getContainerForFixedLengthVector(VecVT);
15418 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
15419 }
15420
15421 MVT XLenVT = Subtarget.getXLenVT();
15422
15423 // Use a VL of 1 to avoid processing more elements than we need.
15424 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
15425
15426 // Unless the index is known to be 0, we must slide the vector down to get
15427 // the desired element into index 0.
15428 if (!isNullConstant(Idx)) {
15429 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
15430 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
15431 }
15432
15433 // Extract the lower XLEN bits of the correct vector element.
15434 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
15435
15436 // To extract the upper XLEN bits of the vector element, shift the first
15437 // element right by 32 bits and re-extract the lower XLEN bits.
15438 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
15439 DAG.getUNDEF(ContainerVT),
15440 DAG.getConstant(32, DL, XLenVT), VL);
15441 SDValue LShr32 =
15442 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
15443 DAG.getUNDEF(ContainerVT), Mask, VL);
15444
15445 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
15446
15447 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
15448 break;
15449 }
15451 unsigned IntNo = N->getConstantOperandVal(0);
15452 switch (IntNo) {
15453 default:
15455 "Don't know how to custom type legalize this intrinsic!");
15456 case Intrinsic::experimental_get_vector_length: {
15457 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
15458 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15459 return;
15460 }
15461 case Intrinsic::experimental_cttz_elts: {
15462 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
15463 Results.push_back(DAG.getZExtOrTrunc(Res, DL, N->getValueType(0)));
15464 return;
15465 }
15466 case Intrinsic::riscv_orc_b:
15467 case Intrinsic::riscv_brev8:
15468 case Intrinsic::riscv_sha256sig0:
15469 case Intrinsic::riscv_sha256sig1:
15470 case Intrinsic::riscv_sha256sum0:
15471 case Intrinsic::riscv_sha256sum1:
15472 case Intrinsic::riscv_sm3p0:
15473 case Intrinsic::riscv_sm3p1: {
15474 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15475 return;
15476 unsigned Opc;
15477 switch (IntNo) {
15478 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
15479 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
15480 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
15481 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
15482 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
15483 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
15484 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
15485 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
15486 }
15487
15488 SDValue NewOp =
15489 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15490 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
15491 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15492 return;
15493 }
15494 case Intrinsic::riscv_sm4ks:
15495 case Intrinsic::riscv_sm4ed: {
15496 unsigned Opc =
15497 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
15498 SDValue NewOp0 =
15499 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15500 SDValue NewOp1 =
15501 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15502 SDValue Res =
15503 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
15504 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15505 return;
15506 }
15507 case Intrinsic::riscv_mopr: {
15508 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15509 return;
15510 SDValue NewOp =
15511 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15512 SDValue Res = DAG.getNode(
15513 RISCVISD::MOP_R, DL, MVT::i64, NewOp,
15514 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
15515 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15516 return;
15517 }
15518 case Intrinsic::riscv_moprr: {
15519 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15520 return;
15521 SDValue NewOp0 =
15522 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15523 SDValue NewOp1 =
15524 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15525 SDValue Res = DAG.getNode(
15526 RISCVISD::MOP_RR, DL, MVT::i64, NewOp0, NewOp1,
15527 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
15528 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15529 return;
15530 }
15531 case Intrinsic::riscv_clmul: {
15532 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15533 return;
15534
15535 SDValue NewOp0 =
15536 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15537 SDValue NewOp1 =
15538 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15539 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
15540 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15541 return;
15542 }
15543 case Intrinsic::riscv_clmulh:
15544 case Intrinsic::riscv_clmulr: {
15545 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15546 return;
15547
15548 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
15549 // to the full 128-bit clmul result of multiplying two xlen values.
15550 // Perform clmulr or clmulh on the shifted values. Finally, extract the
15551 // upper 32 bits.
15552 //
15553 // The alternative is to mask the inputs to 32 bits and use clmul, but
15554 // that requires two shifts to mask each input without zext.w.
15555 // FIXME: If the inputs are known zero extended or could be freely
15556 // zero extended, the mask form would be better.
15557 SDValue NewOp0 =
15558 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15559 SDValue NewOp1 =
15560 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15561 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
15562 DAG.getConstant(32, DL, MVT::i64));
15563 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
15564 DAG.getConstant(32, DL, MVT::i64));
15565 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
15566 : RISCVISD::CLMULR;
15567 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
15568 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
15569 DAG.getConstant(32, DL, MVT::i64));
15570 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15571 return;
15572 }
15573 case Intrinsic::riscv_vmv_x_s: {
15574 EVT VT = N->getValueType(0);
15575 MVT XLenVT = Subtarget.getXLenVT();
15576 if (VT.bitsLT(XLenVT)) {
15577 // Simple case just extract using vmv.x.s and truncate.
15578 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
15579 Subtarget.getXLenVT(), N->getOperand(1));
15580 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
15581 return;
15582 }
15583
15584 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
15585 "Unexpected custom legalization");
15586
15587 // We need to do the move in two steps.
15588 SDValue Vec = N->getOperand(1);
15589 MVT VecVT = Vec.getSimpleValueType();
15590
15591 // First extract the lower XLEN bits of the element.
15592 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
15593
15594 // To extract the upper XLEN bits of the vector element, shift the first
15595 // element right by 32 bits and re-extract the lower XLEN bits.
15596 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
15597
15598 SDValue ThirtyTwoV =
15599 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
15600 DAG.getConstant(32, DL, XLenVT), VL);
15601 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
15602 DAG.getUNDEF(VecVT), Mask, VL);
15603 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
15604
15605 Results.push_back(
15606 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
15607 break;
15608 }
15609 }
15610 break;
15611 }
15612 case ISD::VECREDUCE_ADD:
15613 case ISD::VECREDUCE_AND:
15614 case ISD::VECREDUCE_OR:
15615 case ISD::VECREDUCE_XOR:
15620 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
15621 Results.push_back(V);
15622 break;
15623 case ISD::VP_REDUCE_ADD:
15624 case ISD::VP_REDUCE_AND:
15625 case ISD::VP_REDUCE_OR:
15626 case ISD::VP_REDUCE_XOR:
15627 case ISD::VP_REDUCE_SMAX:
15628 case ISD::VP_REDUCE_UMAX:
15629 case ISD::VP_REDUCE_SMIN:
15630 case ISD::VP_REDUCE_UMIN:
15631 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
15632 Results.push_back(V);
15633 break;
15634 case ISD::GET_ROUNDING: {
15635 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
15636 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
15637 Results.push_back(Res.getValue(0));
15638 Results.push_back(Res.getValue(1));
15639 break;
15640 }
15641 }
15642}
15643
15644/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
15645/// which corresponds to it.
15646static unsigned getVecReduceOpcode(unsigned Opc) {
15647 switch (Opc) {
15648 default:
15649 llvm_unreachable("Unhandled binary to transform reduction");
15650 case ISD::ADD:
15651 return ISD::VECREDUCE_ADD;
15652 case ISD::UMAX:
15653 return ISD::VECREDUCE_UMAX;
15654 case ISD::SMAX:
15655 return ISD::VECREDUCE_SMAX;
15656 case ISD::UMIN:
15657 return ISD::VECREDUCE_UMIN;
15658 case ISD::SMIN:
15659 return ISD::VECREDUCE_SMIN;
15660 case ISD::AND:
15661 return ISD::VECREDUCE_AND;
15662 case ISD::OR:
15663 return ISD::VECREDUCE_OR;
15664 case ISD::XOR:
15665 return ISD::VECREDUCE_XOR;
15666 case ISD::FADD:
15667 // Note: This is the associative form of the generic reduction opcode.
15668 return ISD::VECREDUCE_FADD;
15669 case ISD::FMAXNUM:
15670 return ISD::VECREDUCE_FMAX;
15671 case ISD::FMINNUM:
15672 return ISD::VECREDUCE_FMIN;
15673 }
15674}
15675
15676/// Perform two related transforms whose purpose is to incrementally recognize
15677/// an explode_vector followed by scalar reduction as a vector reduction node.
15678/// This exists to recover from a deficiency in SLP which can't handle
15679/// forests with multiple roots sharing common nodes. In some cases, one
15680/// of the trees will be vectorized, and the other will remain (unprofitably)
15681/// scalarized.
15682static SDValue
15684 const RISCVSubtarget &Subtarget) {
15685
15686 // This transforms need to run before all integer types have been legalized
15687 // to i64 (so that the vector element type matches the add type), and while
15688 // it's safe to introduce odd sized vector types.
15690 return SDValue();
15691
15692 // Without V, this transform isn't useful. We could form the (illegal)
15693 // operations and let them be scalarized again, but there's really no point.
15694 if (!Subtarget.hasVInstructions())
15695 return SDValue();
15696
15697 const SDLoc DL(N);
15698 const EVT VT = N->getValueType(0);
15699 const unsigned Opc = N->getOpcode();
15700
15701 if (!VT.isInteger()) {
15702 switch (Opc) {
15703 default:
15704 return SDValue();
15705 case ISD::FADD:
15706 // For FADD, we only handle the case with reassociation allowed. We
15707 // could handle strict reduction order, but at the moment, there's no
15708 // known reason to, and the complexity isn't worth it.
15709 if (!N->getFlags().hasAllowReassociation())
15710 return SDValue();
15711 break;
15712 case ISD::FMAXNUM:
15713 case ISD::FMINNUM:
15714 break;
15715 }
15716 }
15717
15718 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
15719 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
15720 "Inconsistent mappings");
15721 SDValue LHS = N->getOperand(0);
15722 SDValue RHS = N->getOperand(1);
15723
15724 if (!LHS.hasOneUse() || !RHS.hasOneUse())
15725 return SDValue();
15726
15727 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15728 std::swap(LHS, RHS);
15729
15730 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15731 !isa<ConstantSDNode>(RHS.getOperand(1)))
15732 return SDValue();
15733
15734 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
15735 SDValue SrcVec = RHS.getOperand(0);
15736 EVT SrcVecVT = SrcVec.getValueType();
15737 assert(SrcVecVT.getVectorElementType() == VT);
15738 if (SrcVecVT.isScalableVector())
15739 return SDValue();
15740
15741 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
15742 return SDValue();
15743
15744 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
15745 // reduce_op (extract_subvector [2 x VT] from V). This will form the
15746 // root of our reduction tree. TODO: We could extend this to any two
15747 // adjacent aligned constant indices if desired.
15748 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15749 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
15750 uint64_t LHSIdx =
15751 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
15752 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
15753 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
15754 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15755 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
15756 }
15757 }
15758
15759 // Match (binop (reduce (extract_subvector V, 0),
15760 // (extract_vector_elt V, sizeof(SubVec))))
15761 // into a reduction of one more element from the original vector V.
15762 if (LHS.getOpcode() != ReduceOpc)
15763 return SDValue();
15764
15765 SDValue ReduceVec = LHS.getOperand(0);
15766 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
15767 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
15768 isNullConstant(ReduceVec.getOperand(1)) &&
15769 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
15770 // For illegal types (e.g. 3xi32), most will be combined again into a
15771 // wider (hopefully legal) type. If this is a terminal state, we are
15772 // relying on type legalization here to produce something reasonable
15773 // and this lowering quality could probably be improved. (TODO)
15774 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
15775 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15776 return DAG.getNode(ReduceOpc, DL, VT, Vec,
15777 ReduceVec->getFlags() & N->getFlags());
15778 }
15779
15780 return SDValue();
15781}
15782
15783
15784// Try to fold (<bop> x, (reduction.<bop> vec, start))
15786 const RISCVSubtarget &Subtarget) {
15787 auto BinOpToRVVReduce = [](unsigned Opc) {
15788 switch (Opc) {
15789 default:
15790 llvm_unreachable("Unhandled binary to transform reduction");
15791 case ISD::ADD:
15792 return RISCVISD::VECREDUCE_ADD_VL;
15793 case ISD::UMAX:
15794 return RISCVISD::VECREDUCE_UMAX_VL;
15795 case ISD::SMAX:
15796 return RISCVISD::VECREDUCE_SMAX_VL;
15797 case ISD::UMIN:
15798 return RISCVISD::VECREDUCE_UMIN_VL;
15799 case ISD::SMIN:
15800 return RISCVISD::VECREDUCE_SMIN_VL;
15801 case ISD::AND:
15802 return RISCVISD::VECREDUCE_AND_VL;
15803 case ISD::OR:
15804 return RISCVISD::VECREDUCE_OR_VL;
15805 case ISD::XOR:
15806 return RISCVISD::VECREDUCE_XOR_VL;
15807 case ISD::FADD:
15808 return RISCVISD::VECREDUCE_FADD_VL;
15809 case ISD::FMAXNUM:
15810 return RISCVISD::VECREDUCE_FMAX_VL;
15811 case ISD::FMINNUM:
15812 return RISCVISD::VECREDUCE_FMIN_VL;
15813 }
15814 };
15815
15816 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
15817 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15818 isNullConstant(V.getOperand(1)) &&
15819 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
15820 };
15821
15822 unsigned Opc = N->getOpcode();
15823 unsigned ReduceIdx;
15824 if (IsReduction(N->getOperand(0), Opc))
15825 ReduceIdx = 0;
15826 else if (IsReduction(N->getOperand(1), Opc))
15827 ReduceIdx = 1;
15828 else
15829 return SDValue();
15830
15831 // Skip if FADD disallows reassociation but the combiner needs.
15832 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
15833 return SDValue();
15834
15835 SDValue Extract = N->getOperand(ReduceIdx);
15836 SDValue Reduce = Extract.getOperand(0);
15837 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
15838 return SDValue();
15839
15840 SDValue ScalarV = Reduce.getOperand(2);
15841 EVT ScalarVT = ScalarV.getValueType();
15842 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
15843 ScalarV.getOperand(0)->isUndef() &&
15844 isNullConstant(ScalarV.getOperand(2)))
15845 ScalarV = ScalarV.getOperand(1);
15846
15847 // Make sure that ScalarV is a splat with VL=1.
15848 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
15849 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
15850 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
15851 return SDValue();
15852
15853 if (!isNonZeroAVL(ScalarV.getOperand(2)))
15854 return SDValue();
15855
15856 // Check the scalar of ScalarV is neutral element
15857 // TODO: Deal with value other than neutral element.
15858 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
15859 0))
15860 return SDValue();
15861
15862 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
15863 // FIXME: We might be able to improve this if operand 0 is undef.
15864 if (!isNonZeroAVL(Reduce.getOperand(5)))
15865 return SDValue();
15866
15867 SDValue NewStart = N->getOperand(1 - ReduceIdx);
15868
15869 SDLoc DL(N);
15870 SDValue NewScalarV =
15871 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
15872 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
15873
15874 // If we looked through an INSERT_SUBVECTOR we need to restore it.
15875 if (ScalarVT != ScalarV.getValueType())
15876 NewScalarV =
15877 DAG.getInsertSubvector(DL, DAG.getUNDEF(ScalarVT), NewScalarV, 0);
15878
15879 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
15880 NewScalarV, Reduce.getOperand(3),
15881 Reduce.getOperand(4), Reduce.getOperand(5)};
15882 SDValue NewReduce =
15883 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
15884 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
15885 Extract.getOperand(1));
15886}
15887
15888// Optimize (add (shl x, c0), (shl y, c1)) ->
15889// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
15890// or
15891// (SLLI (QC.SHLADD x, y, c1 - c0), c0), if 4 <= (c1-c0) <=31.
15893 const RISCVSubtarget &Subtarget) {
15894 // Perform this optimization only in the zba/xandesperf/xqciac/xtheadba
15895 // extension.
15896 if (!Subtarget.hasShlAdd(3))
15897 return SDValue();
15898
15899 // Skip for vector types and larger types.
15900 EVT VT = N->getValueType(0);
15901 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15902 return SDValue();
15903
15904 // The two operand nodes must be SHL and have no other use.
15905 SDValue N0 = N->getOperand(0);
15906 SDValue N1 = N->getOperand(1);
15907 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
15908 !N0->hasOneUse() || !N1->hasOneUse())
15909 return SDValue();
15910
15911 // Check c0 and c1.
15912 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15913 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
15914 if (!N0C || !N1C)
15915 return SDValue();
15916 int64_t C0 = N0C->getSExtValue();
15917 int64_t C1 = N1C->getSExtValue();
15918 if (C0 <= 0 || C1 <= 0)
15919 return SDValue();
15920
15921 int64_t Diff = std::abs(C0 - C1);
15922 if (!Subtarget.hasShlAdd(Diff))
15923 return SDValue();
15924
15925 // Build nodes.
15926 SDLoc DL(N);
15927 int64_t Bits = std::min(C0, C1);
15928 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
15929 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
15930 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
15931 DAG.getTargetConstant(Diff, DL, VT), NS);
15932 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
15933}
15934
15935// Check if this SDValue is an add immediate that is fed by a shift of 1, 2,
15936// or 3.
15938 SelectionDAG &DAG) {
15939 using namespace llvm::SDPatternMatch;
15940
15941 // Looking for a reg-reg add and not an addi.
15942 if (isa<ConstantSDNode>(N->getOperand(1)))
15943 return SDValue();
15944
15945 // Based on testing it seems that performance degrades if the ADDI has
15946 // more than 2 uses.
15947 if (AddI->use_size() > 2)
15948 return SDValue();
15949
15950 APInt AddVal;
15951 SDValue SHLVal;
15952 if (!sd_match(AddI, m_Add(m_Value(SHLVal), m_ConstInt(AddVal))))
15953 return SDValue();
15954
15955 APInt VShift;
15956 if (!sd_match(SHLVal, m_OneUse(m_Shl(m_Value(), m_ConstInt(VShift)))))
15957 return SDValue();
15958
15959 if (VShift.slt(1) || VShift.sgt(3))
15960 return SDValue();
15961
15962 SDLoc DL(N);
15963 EVT VT = N->getValueType(0);
15964 // The shift must be positive but the add can be signed.
15965 uint64_t ShlConst = VShift.getZExtValue();
15966 int64_t AddConst = AddVal.getSExtValue();
15967
15968 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, SHLVal->getOperand(0),
15969 DAG.getTargetConstant(ShlConst, DL, VT), Other);
15970 return DAG.getNode(ISD::ADD, DL, VT, SHADD,
15971 DAG.getSignedConstant(AddConst, DL, VT));
15972}
15973
15974// Optimize (add (add (shl x, c0), c1), y) ->
15975// (ADDI (SH*ADD y, x), c1), if c0 equals to [1|2|3].
15977 const RISCVSubtarget &Subtarget) {
15978 // Perform this optimization only in the zba extension.
15979 if (!ReassocShlAddiAdd || !Subtarget.hasShlAdd(3))
15980 return SDValue();
15981
15982 // Skip for vector types and larger types.
15983 EVT VT = N->getValueType(0);
15984 if (VT != Subtarget.getXLenVT())
15985 return SDValue();
15986
15987 SDValue AddI = N->getOperand(0);
15988 SDValue Other = N->getOperand(1);
15989 if (SDValue V = combineShlAddIAddImpl(N, AddI, Other, DAG))
15990 return V;
15991 if (SDValue V = combineShlAddIAddImpl(N, Other, AddI, DAG))
15992 return V;
15993 return SDValue();
15994}
15995
15996// Combine a constant select operand into its use:
15997//
15998// (and (select cond, -1, c), x)
15999// -> (select cond, x, (and x, c)) [AllOnes=1]
16000// (or (select cond, 0, c), x)
16001// -> (select cond, x, (or x, c)) [AllOnes=0]
16002// (xor (select cond, 0, c), x)
16003// -> (select cond, x, (xor x, c)) [AllOnes=0]
16004// (add (select cond, 0, c), x)
16005// -> (select cond, x, (add x, c)) [AllOnes=0]
16006// (sub x, (select cond, 0, c))
16007// -> (select cond, x, (sub x, c)) [AllOnes=0]
16009 SelectionDAG &DAG, bool AllOnes,
16010 const RISCVSubtarget &Subtarget) {
16011 EVT VT = N->getValueType(0);
16012
16013 // Skip vectors.
16014 if (VT.isVector())
16015 return SDValue();
16016
16017 if (!Subtarget.hasConditionalMoveFusion()) {
16018 // (select cond, x, (and x, c)) has custom lowering with Zicond.
16019 if (!Subtarget.hasCZEROLike() || N->getOpcode() != ISD::AND)
16020 return SDValue();
16021
16022 // Maybe harmful when condition code has multiple use.
16023 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
16024 return SDValue();
16025
16026 // Maybe harmful when VT is wider than XLen.
16027 if (VT.getSizeInBits() > Subtarget.getXLen())
16028 return SDValue();
16029 }
16030
16031 if ((Slct.getOpcode() != ISD::SELECT &&
16032 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
16033 !Slct.hasOneUse())
16034 return SDValue();
16035
16036 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
16038 };
16039
16040 bool SwapSelectOps;
16041 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
16042 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
16043 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
16044 SDValue NonConstantVal;
16045 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
16046 SwapSelectOps = false;
16047 NonConstantVal = FalseVal;
16048 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
16049 SwapSelectOps = true;
16050 NonConstantVal = TrueVal;
16051 } else
16052 return SDValue();
16053
16054 // Slct is now know to be the desired identity constant when CC is true.
16055 TrueVal = OtherOp;
16056 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
16057 // Unless SwapSelectOps says the condition should be false.
16058 if (SwapSelectOps)
16059 std::swap(TrueVal, FalseVal);
16060
16061 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
16062 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
16063 {Slct.getOperand(0), Slct.getOperand(1),
16064 Slct.getOperand(2), TrueVal, FalseVal});
16065
16066 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
16067 {Slct.getOperand(0), TrueVal, FalseVal});
16068}
16069
16070// Attempt combineSelectAndUse on each operand of a commutative operator N.
16072 bool AllOnes,
16073 const RISCVSubtarget &Subtarget) {
16074 SDValue N0 = N->getOperand(0);
16075 SDValue N1 = N->getOperand(1);
16076 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
16077 return Result;
16078 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
16079 return Result;
16080 return SDValue();
16081}
16082
16083// Transform (add (mul x, c0), c1) ->
16084// (add (mul (add x, c1/c0), c0), c1%c0).
16085// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
16086// that should be excluded is when c0*(c1/c0) is simm12, which will lead
16087// to an infinite loop in DAGCombine if transformed.
16088// Or transform (add (mul x, c0), c1) ->
16089// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
16090// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
16091// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
16092// lead to an infinite loop in DAGCombine if transformed.
16093// Or transform (add (mul x, c0), c1) ->
16094// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
16095// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
16096// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
16097// lead to an infinite loop in DAGCombine if transformed.
16098// Or transform (add (mul x, c0), c1) ->
16099// (mul (add x, c1/c0), c0).
16100// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
16102 const RISCVSubtarget &Subtarget) {
16103 // Skip for vector types and larger types.
16104 EVT VT = N->getValueType(0);
16105 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
16106 return SDValue();
16107 // The first operand node must be a MUL and has no other use.
16108 SDValue N0 = N->getOperand(0);
16109 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
16110 return SDValue();
16111 // Check if c0 and c1 match above conditions.
16112 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
16113 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
16114 if (!N0C || !N1C)
16115 return SDValue();
16116 // If N0C has multiple uses it's possible one of the cases in
16117 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
16118 // in an infinite loop.
16119 if (!N0C->hasOneUse())
16120 return SDValue();
16121 int64_t C0 = N0C->getSExtValue();
16122 int64_t C1 = N1C->getSExtValue();
16123 int64_t CA, CB;
16124 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
16125 return SDValue();
16126 // Search for proper CA (non-zero) and CB that both are simm12.
16127 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
16128 !isInt<12>(C0 * (C1 / C0))) {
16129 CA = C1 / C0;
16130 CB = C1 % C0;
16131 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
16132 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
16133 CA = C1 / C0 + 1;
16134 CB = C1 % C0 - C0;
16135 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
16136 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
16137 CA = C1 / C0 - 1;
16138 CB = C1 % C0 + C0;
16139 } else
16140 return SDValue();
16141 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
16142 SDLoc DL(N);
16143 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
16144 DAG.getSignedConstant(CA, DL, VT));
16145 SDValue New1 =
16146 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
16147 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
16148}
16149
16150// add (zext, zext) -> zext (add (zext, zext))
16151// sub (zext, zext) -> sext (sub (zext, zext))
16152// mul (zext, zext) -> zext (mul (zext, zext))
16153// sdiv (zext, zext) -> zext (sdiv (zext, zext))
16154// udiv (zext, zext) -> zext (udiv (zext, zext))
16155// srem (zext, zext) -> zext (srem (zext, zext))
16156// urem (zext, zext) -> zext (urem (zext, zext))
16157//
16158// where the sum of the extend widths match, and the the range of the bin op
16159// fits inside the width of the narrower bin op. (For profitability on rvv, we
16160// use a power of two for both inner and outer extend.)
16162
16163 EVT VT = N->getValueType(0);
16164 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
16165 return SDValue();
16166
16167 SDValue N0 = N->getOperand(0);
16168 SDValue N1 = N->getOperand(1);
16170 return SDValue();
16171 if (!N0.hasOneUse() || !N1.hasOneUse())
16172 return SDValue();
16173
16174 SDValue Src0 = N0.getOperand(0);
16175 SDValue Src1 = N1.getOperand(0);
16176 EVT SrcVT = Src0.getValueType();
16177 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
16178 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
16179 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
16180 return SDValue();
16181
16182 LLVMContext &C = *DAG.getContext();
16184 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
16185
16186 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
16187 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
16188
16189 // Src0 and Src1 are zero extended, so they're always positive if signed.
16190 //
16191 // sub can produce a negative from two positive operands, so it needs sign
16192 // extended. Other nodes produce a positive from two positive operands, so
16193 // zero extend instead.
16194 unsigned OuterExtend =
16195 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
16196
16197 return DAG.getNode(
16198 OuterExtend, SDLoc(N), VT,
16199 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
16200}
16201
16202// Try to turn (add (xor bool, 1) -1) into (neg bool).
16204 SDValue N0 = N->getOperand(0);
16205 SDValue N1 = N->getOperand(1);
16206 EVT VT = N->getValueType(0);
16207 SDLoc DL(N);
16208
16209 // RHS should be -1.
16210 if (!isAllOnesConstant(N1))
16211 return SDValue();
16212
16213 // Look for (xor X, 1).
16214 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
16215 return SDValue();
16216
16217 // First xor input should be 0 or 1.
16219 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
16220 return SDValue();
16221
16222 // Emit a negate of the setcc.
16223 return DAG.getNegative(N0.getOperand(0), DL, VT);
16224}
16225
16228 const RISCVSubtarget &Subtarget) {
16229 SelectionDAG &DAG = DCI.DAG;
16230 if (SDValue V = combineAddOfBooleanXor(N, DAG))
16231 return V;
16232 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
16233 return V;
16234 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) {
16235 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
16236 return V;
16237 if (SDValue V = combineShlAddIAdd(N, DAG, Subtarget))
16238 return V;
16239 }
16240 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16241 return V;
16242 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16243 return V;
16244 if (SDValue V = combineBinOpOfZExt(N, DAG))
16245 return V;
16246
16247 // fold (add (select lhs, rhs, cc, 0, y), x) ->
16248 // (select lhs, rhs, cc, x, (add x, y))
16249 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16250}
16251
16252// Try to turn a sub boolean RHS and constant LHS into an addi.
16254 SDValue N0 = N->getOperand(0);
16255 SDValue N1 = N->getOperand(1);
16256 EVT VT = N->getValueType(0);
16257 SDLoc DL(N);
16258
16259 // Require a constant LHS.
16260 auto *N0C = dyn_cast<ConstantSDNode>(N0);
16261 if (!N0C)
16262 return SDValue();
16263
16264 // All our optimizations involve subtracting 1 from the immediate and forming
16265 // an ADDI. Make sure the new immediate is valid for an ADDI.
16266 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
16267 if (!ImmValMinus1.isSignedIntN(12))
16268 return SDValue();
16269
16270 SDValue NewLHS;
16271 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
16272 // (sub constant, (setcc x, y, eq/neq)) ->
16273 // (add (setcc x, y, neq/eq), constant - 1)
16274 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
16275 EVT SetCCOpVT = N1.getOperand(0).getValueType();
16276 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
16277 return SDValue();
16278 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
16279 NewLHS =
16280 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
16281 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
16282 N1.getOperand(0).getOpcode() == ISD::SETCC) {
16283 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
16284 // Since setcc returns a bool the xor is equivalent to 1-setcc.
16285 NewLHS = N1.getOperand(0);
16286 } else
16287 return SDValue();
16288
16289 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
16290 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
16291}
16292
16293// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
16294// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
16295// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
16296// valid with Y=3, while 0b0000_1000_0000_0100 is not.
16298 const RISCVSubtarget &Subtarget) {
16299 if (!Subtarget.hasStdExtZbb())
16300 return SDValue();
16301
16302 EVT VT = N->getValueType(0);
16303
16304 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
16305 return SDValue();
16306
16307 SDValue N0 = N->getOperand(0);
16308 SDValue N1 = N->getOperand(1);
16309
16310 if (N0->getOpcode() != ISD::SHL)
16311 return SDValue();
16312
16313 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
16314 if (!ShAmtCLeft)
16315 return SDValue();
16316 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
16317
16318 if (ShiftedAmount >= 8)
16319 return SDValue();
16320
16321 SDValue LeftShiftOperand = N0->getOperand(0);
16322 SDValue RightShiftOperand = N1;
16323
16324 if (ShiftedAmount != 0) { // Right operand must be a right shift.
16325 if (N1->getOpcode() != ISD::SRL)
16326 return SDValue();
16327 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
16328 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
16329 return SDValue();
16330 RightShiftOperand = N1.getOperand(0);
16331 }
16332
16333 // At least one shift should have a single use.
16334 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
16335 return SDValue();
16336
16337 if (LeftShiftOperand != RightShiftOperand)
16338 return SDValue();
16339
16340 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
16341 Mask <<= ShiftedAmount;
16342 // Check that X has indeed the right shape (only the Y-th bit can be set in
16343 // every byte).
16344 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
16345 return SDValue();
16346
16347 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
16348}
16349
16351 const RISCVSubtarget &Subtarget) {
16352 if (SDValue V = combineSubOfBoolean(N, DAG))
16353 return V;
16354
16355 EVT VT = N->getValueType(0);
16356 SDValue N0 = N->getOperand(0);
16357 SDValue N1 = N->getOperand(1);
16358 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
16359 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
16360 isNullConstant(N1.getOperand(1)) &&
16361 N1.getValueType() == N1.getOperand(0).getValueType()) {
16362 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
16363 if (CCVal == ISD::SETLT) {
16364 SDLoc DL(N);
16365 unsigned ShAmt = N0.getValueSizeInBits() - 1;
16366 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
16367 DAG.getConstant(ShAmt, DL, VT));
16368 }
16369 }
16370
16371 if (SDValue V = combineBinOpOfZExt(N, DAG))
16372 return V;
16373 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
16374 return V;
16375
16376 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
16377 // (select lhs, rhs, cc, x, (sub x, y))
16378 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
16379}
16380
16381// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
16382// Legalizing setcc can introduce xors like this. Doing this transform reduces
16383// the number of xors and may allow the xor to fold into a branch condition.
16385 SDValue N0 = N->getOperand(0);
16386 SDValue N1 = N->getOperand(1);
16387 bool IsAnd = N->getOpcode() == ISD::AND;
16388
16389 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
16390 return SDValue();
16391
16392 if (!N0.hasOneUse() || !N1.hasOneUse())
16393 return SDValue();
16394
16395 SDValue N01 = N0.getOperand(1);
16396 SDValue N11 = N1.getOperand(1);
16397
16398 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
16399 // (xor X, -1) based on the upper bits of the other operand being 0. If the
16400 // operation is And, allow one of the Xors to use -1.
16401 if (isOneConstant(N01)) {
16402 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
16403 return SDValue();
16404 } else if (isOneConstant(N11)) {
16405 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
16406 if (!(IsAnd && isAllOnesConstant(N01)))
16407 return SDValue();
16408 } else
16409 return SDValue();
16410
16411 EVT VT = N->getValueType(0);
16412
16413 SDValue N00 = N0.getOperand(0);
16414 SDValue N10 = N1.getOperand(0);
16415
16416 // The LHS of the xors needs to be 0/1.
16418 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
16419 return SDValue();
16420
16421 // Invert the opcode and insert a new xor.
16422 SDLoc DL(N);
16423 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
16424 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
16425 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
16426}
16427
16428// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
16429// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
16430// value to an unsigned value. This will be lowered to vmax and series of
16431// vnclipu instructions later. This can be extended to other truncated types
16432// other than i8 by replacing 256 and 255 with the equivalent constants for the
16433// type.
16435 EVT VT = N->getValueType(0);
16436 SDValue N0 = N->getOperand(0);
16437 EVT SrcVT = N0.getValueType();
16438
16439 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16440 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
16441 return SDValue();
16442
16443 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
16444 return SDValue();
16445
16446 SDValue Cond = N0.getOperand(0);
16447 SDValue True = N0.getOperand(1);
16448 SDValue False = N0.getOperand(2);
16449
16450 if (Cond.getOpcode() != ISD::SETCC)
16451 return SDValue();
16452
16453 // FIXME: Support the version of this pattern with the select operands
16454 // swapped.
16455 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16456 if (CCVal != ISD::SETULT)
16457 return SDValue();
16458
16459 SDValue CondLHS = Cond.getOperand(0);
16460 SDValue CondRHS = Cond.getOperand(1);
16461
16462 if (CondLHS != True)
16463 return SDValue();
16464
16465 unsigned ScalarBits = VT.getScalarSizeInBits();
16466
16467 // FIXME: Support other constants.
16468 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
16469 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
16470 return SDValue();
16471
16472 if (False.getOpcode() != ISD::SIGN_EXTEND)
16473 return SDValue();
16474
16475 False = False.getOperand(0);
16476
16477 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
16478 return SDValue();
16479
16480 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
16481 if (!FalseRHSC || !FalseRHSC->isZero())
16482 return SDValue();
16483
16484 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
16485 if (CCVal2 != ISD::SETGT)
16486 return SDValue();
16487
16488 // Emit the signed to unsigned saturation pattern.
16489 SDLoc DL(N);
16490 SDValue Max =
16491 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
16492 SDValue Min =
16493 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
16494 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
16495 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
16496}
16497
16498// Handle P extension truncate patterns:
16499// PASUB/PASUBU: (trunc (srl (sub ([s|z]ext a), ([s|z]ext b)), 1))
16500// PMULHSU: (trunc (srl (mul (sext a), (zext b)), EltBits))
16501// PMULHR*: (trunc (srl (add (mul (sext a), (zext b)), round_const), EltBits))
16503 const RISCVSubtarget &Subtarget) {
16504 SDValue N0 = N->getOperand(0);
16505 EVT VT = N->getValueType(0);
16506 if (N0.getOpcode() != ISD::SRL)
16507 return SDValue();
16508
16509 MVT VecVT = VT.getSimpleVT();
16510 if (VecVT != MVT::v4i16 && VecVT != MVT::v2i16 && VecVT != MVT::v8i8 &&
16511 VecVT != MVT::v4i8 && VecVT != MVT::v2i32)
16512 return SDValue();
16513
16514 // Check if shift amount is a splat constant
16515 SDValue ShAmt = N0.getOperand(1);
16516 if (ShAmt.getOpcode() != ISD::BUILD_VECTOR)
16517 return SDValue();
16518
16520 if (!BV)
16521 return SDValue();
16522 SDValue Splat = BV->getSplatValue();
16523 if (!Splat)
16524 return SDValue();
16526 if (!C)
16527 return SDValue();
16528
16529 SDValue Op = N0.getOperand(0);
16530 unsigned ShAmtVal = C->getZExtValue();
16531 unsigned EltBits = VecVT.getScalarSizeInBits();
16532
16533 // Check for rounding pattern: (add (mul ...), round_const)
16534 bool IsRounding = false;
16535 if (Op.getOpcode() == ISD::ADD && (EltBits == 16 || EltBits == 32)) {
16536 SDValue AddRHS = Op.getOperand(1);
16537 if (auto *RndBV = dyn_cast<BuildVectorSDNode>(AddRHS.getNode())) {
16538 if (auto *RndC =
16539 dyn_cast_or_null<ConstantSDNode>(RndBV->getSplatValue())) {
16540 uint64_t ExpectedRnd = 1ULL << (EltBits - 1);
16541 if (RndC->getZExtValue() == ExpectedRnd &&
16542 Op.getOperand(0).getOpcode() == ISD::MUL) {
16543 Op = Op.getOperand(0);
16544 IsRounding = true;
16545 }
16546 }
16547 }
16548 }
16549
16550 SDValue LHS = Op.getOperand(0);
16551 SDValue RHS = Op.getOperand(1);
16552
16553 bool LHSIsSExt = LHS.getOpcode() == ISD::SIGN_EXTEND;
16554 bool LHSIsZExt = LHS.getOpcode() == ISD::ZERO_EXTEND;
16555 bool RHSIsSExt = RHS.getOpcode() == ISD::SIGN_EXTEND;
16556 bool RHSIsZExt = RHS.getOpcode() == ISD::ZERO_EXTEND;
16557
16558 if (!(LHSIsSExt || LHSIsZExt) || !(RHSIsSExt || RHSIsZExt))
16559 return SDValue();
16560
16561 SDValue A = LHS.getOperand(0);
16562 SDValue B = RHS.getOperand(0);
16563
16564 if (A.getValueType() != VT || B.getValueType() != VT)
16565 return SDValue();
16566
16567 unsigned Opc;
16568 switch (Op.getOpcode()) {
16569 default:
16570 return SDValue();
16571 case ISD::SUB:
16572 // PASUB/PASUBU: shift amount must be 1
16573 if (ShAmtVal != 1)
16574 return SDValue();
16575 if (LHSIsSExt && RHSIsSExt)
16576 Opc = RISCVISD::PASUB;
16577 else if (LHSIsZExt && RHSIsZExt)
16578 Opc = RISCVISD::PASUBU;
16579 else
16580 return SDValue();
16581 break;
16582 case ISD::MUL:
16583 // PMULH*/PMULHR*: shift amount must be element size, only for i16/i32
16584 if (ShAmtVal != EltBits || (EltBits != 16 && EltBits != 32))
16585 return SDValue();
16586 if (IsRounding) {
16587 if (LHSIsSExt && RHSIsSExt) {
16588 Opc = RISCVISD::PMULHR;
16589 } else if (LHSIsZExt && RHSIsZExt) {
16590 Opc = RISCVISD::PMULHRU;
16591 } else if ((LHSIsSExt && RHSIsZExt) || (LHSIsZExt && RHSIsSExt)) {
16592 Opc = RISCVISD::PMULHRSU;
16593 // commuted case
16594 if (LHSIsZExt && RHSIsSExt)
16595 std::swap(A, B);
16596 } else {
16597 return SDValue();
16598 }
16599 } else {
16600 if ((LHSIsSExt && RHSIsZExt) || (LHSIsZExt && RHSIsSExt)) {
16601 Opc = RISCVISD::PMULHSU;
16602 // commuted case
16603 if (LHSIsZExt && RHSIsSExt)
16604 std::swap(A, B);
16605 } else
16606 return SDValue();
16607 }
16608 break;
16609 }
16610
16611 return DAG.getNode(Opc, SDLoc(N), VT, {A, B});
16612}
16613
16615 const RISCVSubtarget &Subtarget) {
16616 SDValue N0 = N->getOperand(0);
16617 EVT VT = N->getValueType(0);
16618
16619 if (VT.isFixedLengthVector() && Subtarget.enablePExtSIMDCodeGen())
16620 return combinePExtTruncate(N, DAG, Subtarget);
16621
16622 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
16623 // extending X. This is safe since we only need the LSB after the shift and
16624 // shift amounts larger than 31 would produce poison. If we wait until
16625 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
16626 // to use a BEXT instruction.
16627 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
16628 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
16629 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
16630 SDLoc DL(N0);
16631 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16632 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16633 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
16634 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
16635 }
16636
16637 return combineTruncSelectToSMaxUSat(N, DAG);
16638}
16639
16640// InstCombinerImpl::transformZExtICmp will narrow a zext of an icmp with a
16641// truncation. But RVV doesn't have truncation instructions for more than twice
16642// the bitwidth.
16643//
16644// E.g. trunc <vscale x 1 x i64> %x to <vscale x 1 x i8> will generate:
16645//
16646// vsetvli a0, zero, e32, m2, ta, ma
16647// vnsrl.wi v12, v8, 0
16648// vsetvli zero, zero, e16, m1, ta, ma
16649// vnsrl.wi v8, v12, 0
16650// vsetvli zero, zero, e8, mf2, ta, ma
16651// vnsrl.wi v8, v8, 0
16652//
16653// So reverse the combine so we generate an vmseq/vmsne again:
16654//
16655// and (lshr (trunc X), ShAmt), 1
16656// -->
16657// zext (icmp ne (and X, (1 << ShAmt)), 0)
16658//
16659// and (lshr (not (trunc X)), ShAmt), 1
16660// -->
16661// zext (icmp eq (and X, (1 << ShAmt)), 0)
16663 const RISCVSubtarget &Subtarget) {
16664 using namespace SDPatternMatch;
16665 SDLoc DL(N);
16666
16667 if (!Subtarget.hasVInstructions())
16668 return SDValue();
16669
16670 EVT VT = N->getValueType(0);
16671 if (!VT.isVector())
16672 return SDValue();
16673
16674 APInt ShAmt;
16675 SDValue Inner;
16676 if (!sd_match(N, m_And(m_OneUse(m_Srl(m_Value(Inner), m_ConstInt(ShAmt))),
16677 m_One())))
16678 return SDValue();
16679
16680 SDValue X;
16681 bool IsNot;
16682 if (sd_match(Inner, m_Not(m_Trunc(m_Value(X)))))
16683 IsNot = true;
16684 else if (sd_match(Inner, m_Trunc(m_Value(X))))
16685 IsNot = false;
16686 else
16687 return SDValue();
16688
16689 EVT WideVT = X.getValueType();
16690 if (VT.getScalarSizeInBits() >= WideVT.getScalarSizeInBits() / 2)
16691 return SDValue();
16692
16693 SDValue Res =
16694 DAG.getNode(ISD::AND, DL, WideVT, X,
16695 DAG.getConstant(1ULL << ShAmt.getZExtValue(), DL, WideVT));
16696 Res = DAG.getSetCC(DL,
16697 EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16698 WideVT.getVectorElementCount()),
16699 Res, DAG.getConstant(0, DL, WideVT),
16700 IsNot ? ISD::SETEQ : ISD::SETNE);
16701 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
16702}
16703
16704// (and (i1) f, (setcc c, 0, ne)) -> (czero.nez f, c)
16705// (and (i1) f, (setcc c, 0, eq)) -> (czero.eqz f, c)
16706// (and (setcc c, 0, ne), (i1) g) -> (czero.nez g, c)
16707// (and (setcc c, 0, eq), (i1) g) -> (czero.eqz g, c)
16709 const RISCVSubtarget &Subtarget) {
16710 if (!Subtarget.hasCZEROLike())
16711 return SDValue();
16712
16713 SDValue N0 = N->getOperand(0);
16714 SDValue N1 = N->getOperand(1);
16715
16716 auto IsEqualCompZero = [](SDValue &V) -> bool {
16717 if (V.getOpcode() == ISD::SETCC && isNullConstant(V.getOperand(1))) {
16718 ISD::CondCode CC = cast<CondCodeSDNode>(V.getOperand(2))->get();
16720 return true;
16721 }
16722 return false;
16723 };
16724
16725 if (!IsEqualCompZero(N0) || !N0.hasOneUse())
16726 std::swap(N0, N1);
16727 if (!IsEqualCompZero(N0) || !N0.hasOneUse())
16728 return SDValue();
16729
16730 KnownBits Known = DAG.computeKnownBits(N1);
16731 if (Known.getMaxValue().ugt(1))
16732 return SDValue();
16733
16734 unsigned CzeroOpcode =
16735 (cast<CondCodeSDNode>(N0.getOperand(2))->get() == ISD::SETNE)
16736 ? RISCVISD::CZERO_EQZ
16737 : RISCVISD::CZERO_NEZ;
16738
16739 EVT VT = N->getValueType(0);
16740 SDLoc DL(N);
16741 return DAG.getNode(CzeroOpcode, DL, VT, N1, N0.getOperand(0));
16742}
16743
16746 SelectionDAG &DAG = DCI.DAG;
16747 if (N->getOpcode() != ISD::AND)
16748 return SDValue();
16749
16750 SDValue N0 = N->getOperand(0);
16751 if (N0.getOpcode() != ISD::ATOMIC_LOAD)
16752 return SDValue();
16753 if (!N0.hasOneUse())
16754 return SDValue();
16755
16758 return SDValue();
16759
16760 EVT LoadedVT = ALoad->getMemoryVT();
16761 ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(N->getOperand(1));
16762 if (!MaskConst)
16763 return SDValue();
16764 uint64_t Mask = MaskConst->getZExtValue();
16765 uint64_t ExpectedMask = maskTrailingOnes<uint64_t>(LoadedVT.getSizeInBits());
16766 if (Mask != ExpectedMask)
16767 return SDValue();
16768
16769 SDValue ZextLoad = DAG.getAtomicLoad(
16770 ISD::ZEXTLOAD, SDLoc(N), ALoad->getMemoryVT(), N->getValueType(0),
16771 ALoad->getChain(), ALoad->getBasePtr(), ALoad->getMemOperand());
16772 DCI.CombineTo(N, ZextLoad);
16773 DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1));
16775 return SDValue(N, 0);
16776}
16777
16778// Sometimes a mask is applied after a shift. If that shift was fed by a
16779// load, there is sometimes the opportunity to narrow the load, which is
16780// hidden by the intermediate shift. Detect that case and commute the
16781// shift/and in order to enable load narrowing.
16783 EVT VT = N->getValueType(0);
16784 if (!VT.isScalarInteger())
16785 return SDValue();
16786
16787 using namespace SDPatternMatch;
16788 SDValue LoadNode;
16789 APInt MaskVal, ShiftVal;
16790 // (and (shl (load ...), ShiftAmt), Mask)
16791 if (!sd_match(
16793 m_ConstInt(ShiftVal))),
16794 m_ConstInt(MaskVal)))) {
16795 return SDValue();
16796 }
16797
16798 uint64_t ShiftAmt = ShiftVal.getZExtValue();
16799
16800 if (ShiftAmt >= VT.getSizeInBits())
16801 return SDValue();
16802
16803 // Calculate the appropriate mask if it were applied before the shift.
16804 APInt InnerMask = MaskVal.lshr(ShiftAmt);
16805 bool IsNarrowable =
16806 InnerMask == 0xff || InnerMask == 0xffff || InnerMask == 0xffffffff;
16807
16808 if (!IsNarrowable)
16809 return SDValue();
16810
16811 // AND the loaded value and change the shift appropriately, allowing
16812 // the load to be narrowed.
16813 SDLoc DL(N);
16814 SDValue InnerAnd = DAG.getNode(ISD::AND, DL, VT, LoadNode,
16815 DAG.getConstant(InnerMask, DL, VT));
16816 return DAG.getNode(ISD::SHL, DL, VT, InnerAnd,
16817 DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
16818}
16819
16820// Combines two comparison operation and logic operation to one selection
16821// operation(min, max) and logic operation. Returns new constructed Node if
16822// conditions for optimization are satisfied.
16825 const RISCVSubtarget &Subtarget) {
16826 SelectionDAG &DAG = DCI.DAG;
16827 SDValue N0 = N->getOperand(0);
16828
16829 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
16830 // extending X. This is safe since we only need the LSB after the shift and
16831 // shift amounts larger than 31 would produce poison. If we wait until
16832 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
16833 // to use a BEXT instruction.
16834 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16835 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
16836 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
16837 N0.hasOneUse()) {
16838 SDLoc DL(N);
16839 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16840 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16841 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
16842 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
16843 DAG.getConstant(1, DL, MVT::i64));
16844 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
16845 }
16846
16848 return V;
16849 if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
16850 return V;
16851 if (DCI.isAfterLegalizeDAG())
16852 if (SDValue V = combineANDOfSETCCToCZERO(N, DAG, Subtarget))
16853 return V;
16854 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16855 return V;
16856 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16857 return V;
16858 if (SDValue V = reduceANDOfAtomicLoad(N, DCI))
16859 return V;
16860
16861 if (DCI.isAfterLegalizeDAG())
16862 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16863 return V;
16864
16865 // fold (and (select lhs, rhs, cc, -1, y), x) ->
16866 // (select lhs, rhs, cc, x, (and x, y))
16867 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
16868}
16869
16870// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
16871// FIXME: Generalize to other binary operators with same operand.
16873 SelectionDAG &DAG) {
16874 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
16875
16876 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
16877 N1.getOpcode() != RISCVISD::CZERO_NEZ ||
16878 !N0.hasOneUse() || !N1.hasOneUse())
16879 return SDValue();
16880
16881 // Should have the same condition.
16882 SDValue Cond = N0.getOperand(1);
16883 if (Cond != N1.getOperand(1))
16884 return SDValue();
16885
16886 SDValue TrueV = N0.getOperand(0);
16887 SDValue FalseV = N1.getOperand(0);
16888
16889 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
16890 TrueV.getOperand(1) != FalseV.getOperand(1) ||
16891 !isOneConstant(TrueV.getOperand(1)) ||
16892 !TrueV.hasOneUse() || !FalseV.hasOneUse())
16893 return SDValue();
16894
16895 EVT VT = N->getValueType(0);
16896 SDLoc DL(N);
16897
16898 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
16899 Cond);
16900 SDValue NewN1 =
16901 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), Cond);
16902 SDValue NewOr =
16903 DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1, SDNodeFlags::Disjoint);
16904 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
16905}
16906
16907// (xor X, (xor (and X, C2), Y))
16908// ->(qc_insb X, (sra Y, ShAmt), Width, ShAmt)
16909// where C2 is a shifted mask with width = Width and shift = ShAmt
16910// qc_insb might become qc.insb or qc.insbi depending on the operands.
16912 const RISCVSubtarget &Subtarget) {
16913 if (!Subtarget.hasVendorXqcibm())
16914 return SDValue();
16915
16916 using namespace SDPatternMatch;
16917 SDValue Base, Inserted;
16918 APInt CMask;
16919 if (!sd_match(N, m_Xor(m_Value(Base),
16921 m_ConstInt(CMask))),
16922 m_Value(Inserted))))))
16923 return SDValue();
16924
16925 if (N->getValueType(0) != MVT::i32)
16926 return SDValue();
16927 unsigned Width, ShAmt;
16928 if (!CMask.isShiftedMask(ShAmt, Width))
16929 return SDValue();
16930
16931 // Check if all zero bits in CMask are also zero in Inserted
16932 if (!DAG.MaskedValueIsZero(Inserted, ~CMask))
16933 return SDValue();
16934
16935 SDLoc DL(N);
16936
16937 // `Inserted` needs to be right shifted before it is put into the
16938 // instruction.
16939 Inserted = DAG.getNode(ISD::SRA, DL, MVT::i32, Inserted,
16940 DAG.getShiftAmountConstant(ShAmt, MVT::i32, DL));
16941
16942 SDValue Ops[] = {Base, Inserted, DAG.getConstant(Width, DL, MVT::i32),
16943 DAG.getConstant(ShAmt, DL, MVT::i32)};
16944 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
16945}
16946
16948 const RISCVSubtarget &Subtarget) {
16949 if (!Subtarget.hasVendorXqcibm())
16950 return SDValue();
16951
16952 using namespace SDPatternMatch;
16953
16954 SDValue X;
16955 APInt MaskImm;
16956 if (!sd_match(N, m_Or(m_OneUse(m_Value(X)), m_ConstInt(MaskImm))))
16957 return SDValue();
16958
16959 unsigned ShAmt, Width;
16960 if (!MaskImm.isShiftedMask(ShAmt, Width) || MaskImm.isSignedIntN(12))
16961 return SDValue();
16962
16963 if (N->getValueType(0) != MVT::i32)
16964 return SDValue();
16965
16966 // If Zbs is enabled and it is a single bit set we can use BSETI which
16967 // can be compressed to C_BSETI when Xqcibm in enabled.
16968 if (Width == 1 && Subtarget.hasStdExtZbs())
16969 return SDValue();
16970
16971 // If C1 is a shifted mask (but can't be formed as an ORI),
16972 // use a bitfield insert of -1.
16973 // Transform (or x, C1)
16974 // -> (qc.insbi x, -1, width, shift)
16975 SDLoc DL(N);
16976
16977 SDValue Ops[] = {X, DAG.getSignedConstant(-1, DL, MVT::i32),
16978 DAG.getConstant(Width, DL, MVT::i32),
16979 DAG.getConstant(ShAmt, DL, MVT::i32)};
16980 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
16981}
16982
16983// Generate a QC_INSB/QC_INSBI from 'or (and X, MaskImm), OrImm' iff the value
16984// being inserted only sets known zero bits.
16986 const RISCVSubtarget &Subtarget) {
16987 // Supported only in Xqcibm for now.
16988 if (!Subtarget.hasVendorXqcibm())
16989 return SDValue();
16990
16991 using namespace SDPatternMatch;
16992
16993 SDValue Inserted;
16994 APInt MaskImm, OrImm;
16995 if (!sd_match(
16996 N, m_SpecificVT(MVT::i32, m_Or(m_OneUse(m_And(m_Value(Inserted),
16997 m_ConstInt(MaskImm))),
16998 m_ConstInt(OrImm)))))
16999 return SDValue();
17000
17001 // Compute the Known Zero for the AND as this allows us to catch more general
17002 // cases than just looking for AND with imm.
17003 KnownBits Known = DAG.computeKnownBits(N->getOperand(0));
17004
17005 // The bits being inserted must only set those bits that are known to be
17006 // zero.
17007 if (!OrImm.isSubsetOf(Known.Zero)) {
17008 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
17009 // currently handle this case.
17010 return SDValue();
17011 }
17012
17013 unsigned ShAmt, Width;
17014 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
17015 if (!Known.Zero.isShiftedMask(ShAmt, Width))
17016 return SDValue();
17017
17018 // QC_INSB(I) dst, src, #width, #shamt.
17019 SDLoc DL(N);
17020
17021 SDValue ImmNode =
17022 DAG.getSignedConstant(OrImm.getSExtValue() >> ShAmt, DL, MVT::i32);
17023
17024 SDValue Ops[] = {Inserted, ImmNode, DAG.getConstant(Width, DL, MVT::i32),
17025 DAG.getConstant(ShAmt, DL, MVT::i32)};
17026 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
17027}
17028
17030 const RISCVSubtarget &Subtarget) {
17031 SelectionDAG &DAG = DCI.DAG;
17032
17033 if (SDValue V = combineOrToBitfieldInsert(N, DAG, Subtarget))
17034 return V;
17035 if (SDValue V = combineOrAndToBitfieldInsert(N, DAG, Subtarget))
17036 return V;
17037 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
17038 return V;
17039 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
17040 return V;
17041
17042 if (DCI.isAfterLegalizeDAG())
17043 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
17044 return V;
17045
17046 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
17047 // We may be able to pull a common operation out of the true and false value.
17048 SDValue N0 = N->getOperand(0);
17049 SDValue N1 = N->getOperand(1);
17050 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
17051 return V;
17052 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
17053 return V;
17054
17055 // fold (or (select cond, 0, y), x) ->
17056 // (select cond, x, (or x, y))
17057 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
17058}
17059
17061 const RISCVSubtarget &Subtarget) {
17062 SDValue N0 = N->getOperand(0);
17063 SDValue N1 = N->getOperand(1);
17064
17065 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
17066 // (ADDI (BSET X0, X), -1). If we wait until type legalization, we'll create
17067 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
17068 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
17069 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
17070 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
17071 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
17072 SDLoc DL(N);
17073 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
17074 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
17075 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
17076 SDValue Not = DAG.getNOT(DL, Shl, MVT::i64);
17077 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Not);
17078 }
17079
17080 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
17081 // NOTE: Assumes ROL being legal means ROLW is legal.
17082 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17083 if (N0.getOpcode() == RISCVISD::SLLW &&
17085 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
17086 SDLoc DL(N);
17087 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
17088 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
17089 }
17090
17091 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
17092 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
17093 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
17095 if (ConstN00 && CC == ISD::SETLT) {
17096 EVT VT = N0.getValueType();
17097 SDLoc DL(N0);
17098 const APInt &Imm = ConstN00->getAPIntValue();
17099 if ((Imm + 1).isSignedIntN(12))
17100 return DAG.getSetCC(DL, VT, N0.getOperand(1),
17101 DAG.getConstant(Imm + 1, DL, VT), CC);
17102 }
17103 }
17104
17105 if (SDValue V = combineXorToBitfieldInsert(N, DAG, Subtarget))
17106 return V;
17107
17108 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
17109 return V;
17110 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
17111 return V;
17112
17113 // fold (xor (select cond, 0, y), x) ->
17114 // (select cond, x, (xor x, y))
17115 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
17116}
17117
17118// Try to expand a multiply to a sequence of shifts and add/subs,
17119// for a machine without native mul instruction.
17121 uint64_t MulAmt) {
17122 SDLoc DL(N);
17123 EVT VT = N->getValueType(0);
17125
17126 SDValue Result = DAG.getConstant(0, DL, N->getValueType(0));
17127 SDValue N0 = N->getOperand(0);
17128
17129 // Find the Non-adjacent form of the multiplier.
17130 for (uint64_t E = MulAmt, I = 0; E && I < BitWidth; ++I, E >>= 1) {
17131 if (E & 1) {
17132 bool IsAdd = (E & 3) == 1;
17133 E -= IsAdd ? 1 : -1;
17134 SDValue ShiftVal = DAG.getNode(ISD::SHL, DL, VT, N0,
17135 DAG.getShiftAmountConstant(I, VT, DL));
17136 ISD::NodeType AddSubOp = IsAdd ? ISD::ADD : ISD::SUB;
17137 Result = DAG.getNode(AddSubOp, DL, VT, Result, ShiftVal);
17138 }
17139 }
17140
17141 return Result;
17142}
17143
17144// X * (2^N +/- 2^M) -> (add/sub (shl X, C1), (shl X, C2))
17146 uint64_t MulAmt) {
17147 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
17148 SDValue X = N->getOperand(0);
17150 uint64_t ShiftAmt1;
17151 bool CanSub = isPowerOf2_64(MulAmt + MulAmtLowBit);
17152 auto PreferSub = [X, MulAmtLowBit]() {
17153 // For MulAmt == 3 << M both (X << M + 2) - (X << M)
17154 // and (X << M + 1) + (X << M) are valid expansions.
17155 // Prefer SUB if we can get (X << M + 2) for free,
17156 // because X is exact (Y >> M + 2).
17157 uint64_t ShAmt = Log2_64(MulAmtLowBit) + 2;
17158 using namespace SDPatternMatch;
17159 return sd_match(X, m_ExactSr(m_Value(), m_SpecificInt(ShAmt)));
17160 };
17161 if (isPowerOf2_64(MulAmt - MulAmtLowBit) && !(CanSub && PreferSub())) {
17162 Op = ISD::ADD;
17163 ShiftAmt1 = MulAmt - MulAmtLowBit;
17164 } else if (CanSub) {
17165 Op = ISD::SUB;
17166 ShiftAmt1 = MulAmt + MulAmtLowBit;
17167 } else {
17168 return SDValue();
17169 }
17170 EVT VT = N->getValueType(0);
17171 SDLoc DL(N);
17172 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, X,
17173 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
17174 SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, X,
17175 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
17176 return DAG.getNode(Op, DL, VT, Shift1, Shift2);
17177}
17178
17179static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX,
17180 unsigned ShY, bool AddX, unsigned Shift) {
17181 SDLoc DL(N);
17182 EVT VT = N->getValueType(0);
17183 SDValue X = N->getOperand(0);
17184 // Put the shift first if we can fold:
17185 // a. a zext into the shift forming a slli.uw
17186 // b. an exact shift right forming one shorter shift or no shift at all
17187 using namespace SDPatternMatch;
17188 if (Shift != 0 &&
17189 sd_match(X, m_AnyOf(m_And(m_Value(), m_SpecificInt(UINT64_C(0xffffffff))),
17190 m_ExactSr(m_Value(), m_ConstInt())))) {
17191 X = DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(Shift, DL, VT));
17192 Shift = 0;
17193 }
17194 SDValue ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
17195 DAG.getTargetConstant(ShY, DL, VT), X);
17196 if (ShX != 0)
17197 ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, ShlAdd,
17198 DAG.getTargetConstant(ShX, DL, VT), AddX ? X : ShlAdd);
17199 if (Shift == 0)
17200 return ShlAdd;
17201 // Otherwise, put the shl last so that it can fold with following instructions
17202 // (e.g. sext or add).
17203 return DAG.getNode(ISD::SHL, DL, VT, ShlAdd, DAG.getConstant(Shift, DL, VT));
17204}
17205
17207 uint64_t MulAmt, unsigned Shift) {
17208 switch (MulAmt) {
17209 // 3/5/9 -> (shYadd X, X)
17210 case 3:
17211 return getShlAddShlAdd(N, DAG, 0, 1, /*AddX=*/false, Shift);
17212 case 5:
17213 return getShlAddShlAdd(N, DAG, 0, 2, /*AddX=*/false, Shift);
17214 case 9:
17215 return getShlAddShlAdd(N, DAG, 0, 3, /*AddX=*/false, Shift);
17216 // 3/5/9 * 3/5/9 -> (shXadd (shYadd X, X), (shYadd X, X))
17217 case 5 * 3:
17218 return getShlAddShlAdd(N, DAG, 2, 1, /*AddX=*/false, Shift);
17219 case 9 * 3:
17220 return getShlAddShlAdd(N, DAG, 3, 1, /*AddX=*/false, Shift);
17221 case 5 * 5:
17222 return getShlAddShlAdd(N, DAG, 2, 2, /*AddX=*/false, Shift);
17223 case 9 * 5:
17224 return getShlAddShlAdd(N, DAG, 3, 2, /*AddX=*/false, Shift);
17225 case 9 * 9:
17226 return getShlAddShlAdd(N, DAG, 3, 3, /*AddX=*/false, Shift);
17227 default:
17228 break;
17229 }
17230
17231 int ShX;
17232 if (int ShY = isShifted359(MulAmt - 1, ShX)) {
17233 assert(ShX != 0 && "MulAmt=4,6,10 handled before");
17234 // 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X)
17235 if (ShX <= 3)
17236 return getShlAddShlAdd(N, DAG, ShX, ShY, /*AddX=*/true, Shift);
17237 // 2^N * 3/5/9 + 1 -> (add (shYadd (shl X, N), (shl X, N)), X)
17238 if (Shift == 0) {
17239 SDLoc DL(N);
17240 EVT VT = N->getValueType(0);
17241 SDValue X = N->getOperand(0);
17242 SDValue Shl =
17243 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShX, DL, VT));
17244 SDValue ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
17245 DAG.getTargetConstant(ShY, DL, VT), Shl);
17246 return DAG.getNode(ISD::ADD, DL, VT, ShlAdd, X);
17247 }
17248 }
17249 return SDValue();
17250}
17251
17252// Try to expand a scalar multiply to a faster sequence.
17255 const RISCVSubtarget &Subtarget) {
17256
17257 EVT VT = N->getValueType(0);
17258
17259 // LI + MUL is usually smaller than the alternative sequence.
17261 return SDValue();
17262
17263 if (VT != Subtarget.getXLenVT())
17264 return SDValue();
17265
17266 bool ShouldExpandMul =
17267 (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) ||
17268 !Subtarget.hasStdExtZmmul();
17269 if (!ShouldExpandMul)
17270 return SDValue();
17271
17272 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
17273 if (!CNode)
17274 return SDValue();
17275 uint64_t MulAmt = CNode->getZExtValue();
17276
17277 // Don't do this if the Xqciac extension is enabled and the MulAmt in simm12.
17278 if (Subtarget.hasVendorXqciac() && isInt<12>(CNode->getSExtValue()))
17279 return SDValue();
17280
17281 // WARNING: The code below is knowingly incorrect with regards to undef
17282 // semantics. We're adding additional uses of X here, and in principle, we
17283 // should be freezing X before doing so. However, adding freeze here causes
17284 // real regressions, and no other target properly freezes X in these cases
17285 // either.
17286 if (Subtarget.hasShlAdd(3)) {
17287 // 3/5/9 * 2^N -> (shl (shXadd X, X), N)
17288 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
17289 // of 25 which happen to be quite common.
17290 // (2/4/8 * 3/5/9 + 1) * 2^N
17291 unsigned Shift = llvm::countr_zero(MulAmt);
17292 if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift, Shift))
17293 return V;
17294
17295 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
17296 // shXadd. First check if this a sum of two power of 2s because that's
17297 // easy. Then count how many zeros are up to the first bit.
17298 SDValue X = N->getOperand(0);
17299 if (Shift >= 1 && Shift <= 3 && isPowerOf2_64(MulAmt & (MulAmt - 1))) {
17300 unsigned ShiftAmt = llvm::countr_zero((MulAmt & (MulAmt - 1)));
17301 SDLoc DL(N);
17302 SDValue Shift1 =
17303 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
17304 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
17305 DAG.getTargetConstant(Shift, DL, VT), Shift1);
17306 }
17307
17308 // TODO: 2^(C1>3) * 3/5/9 - 1
17309
17310 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
17311 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
17312 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
17313 if (ScaleShift >= 1 && ScaleShift < 4) {
17314 unsigned ShiftAmt = llvm::countr_zero((MulAmt - 1) & (MulAmt - 2));
17315 SDLoc DL(N);
17316 SDValue Shift1 =
17317 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
17318 return DAG.getNode(
17319 ISD::ADD, DL, VT, Shift1,
17320 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
17321 DAG.getTargetConstant(ScaleShift, DL, VT), X));
17322 }
17323 }
17324
17325 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
17326 for (uint64_t Offset : {3, 5, 9}) {
17327 if (isPowerOf2_64(MulAmt + Offset)) {
17328 unsigned ShAmt = llvm::countr_zero(MulAmt + Offset);
17329 if (ShAmt >= VT.getSizeInBits())
17330 continue;
17331 SDLoc DL(N);
17332 SDValue Shift1 =
17333 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShAmt, DL, VT));
17334 SDValue Mul359 =
17335 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
17336 DAG.getTargetConstant(Log2_64(Offset - 1), DL, VT), X);
17337 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
17338 }
17339 }
17340 }
17341
17342 if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt))
17343 return V;
17344
17345 if (!Subtarget.hasStdExtZmmul())
17346 return expandMulToNAFSequence(N, DAG, MulAmt);
17347
17348 return SDValue();
17349}
17350
17351// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
17352// (bitcast (sra (v2Xi16 (bitcast X)), 15))
17353// Same for other equivalent types with other equivalent constants.
17355 EVT VT = N->getValueType(0);
17356 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17357
17358 // Do this for legal vectors unless they are i1 or i8 vectors.
17359 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
17360 return SDValue();
17361
17362 if (N->getOperand(0).getOpcode() != ISD::AND ||
17363 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
17364 return SDValue();
17365
17366 SDValue And = N->getOperand(0);
17367 SDValue Srl = And.getOperand(0);
17368
17369 APInt V1, V2, V3;
17370 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
17371 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
17373 return SDValue();
17374
17375 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
17376 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
17377 V3 != (HalfSize - 1))
17378 return SDValue();
17379
17380 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
17381 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
17382 VT.getVectorElementCount() * 2);
17383 SDLoc DL(N);
17384 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
17385 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
17386 DAG.getConstant(HalfSize - 1, DL, HalfVT));
17387 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
17388}
17389
17392 const RISCVSubtarget &Subtarget) {
17393 EVT VT = N->getValueType(0);
17394 if (!VT.isVector())
17395 return expandMul(N, DAG, DCI, Subtarget);
17396
17397 SDLoc DL(N);
17398 SDValue N0 = N->getOperand(0);
17399 SDValue N1 = N->getOperand(1);
17400 SDValue MulOper;
17401 unsigned AddSubOpc;
17402
17403 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
17404 // (mul x, add (y, 1)) -> (add x, (mul x, y))
17405 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
17406 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
17407 auto IsAddSubWith1 = [&](SDValue V) -> bool {
17408 AddSubOpc = V->getOpcode();
17409 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
17410 SDValue Opnd = V->getOperand(1);
17411 MulOper = V->getOperand(0);
17412 if (AddSubOpc == ISD::SUB)
17413 std::swap(Opnd, MulOper);
17414 if (isOneOrOneSplat(Opnd))
17415 return true;
17416 }
17417 return false;
17418 };
17419
17420 if (IsAddSubWith1(N0)) {
17421 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
17422 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
17423 }
17424
17425 if (IsAddSubWith1(N1)) {
17426 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
17427 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
17428 }
17429
17430 if (SDValue V = combineBinOpOfZExt(N, DAG))
17431 return V;
17432
17434 return V;
17435
17436 return SDValue();
17437}
17438
17439/// According to the property that indexed load/store instructions zero-extend
17440/// their indices, try to narrow the type of index operand.
17441static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
17442 if (isIndexTypeSigned(IndexType))
17443 return false;
17444
17445 if (!N->hasOneUse())
17446 return false;
17447
17448 EVT VT = N.getValueType();
17449 SDLoc DL(N);
17450
17451 // In general, what we're doing here is seeing if we can sink a truncate to
17452 // a smaller element type into the expression tree building our index.
17453 // TODO: We can generalize this and handle a bunch more cases if useful.
17454
17455 // Narrow a buildvector to the narrowest element type. This requires less
17456 // work and less register pressure at high LMUL, and creates smaller constants
17457 // which may be cheaper to materialize.
17458 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
17459 KnownBits Known = DAG.computeKnownBits(N);
17460 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
17461 LLVMContext &C = *DAG.getContext();
17462 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
17463 if (ResultVT.bitsLT(VT.getVectorElementType())) {
17464 N = DAG.getNode(ISD::TRUNCATE, DL,
17465 VT.changeVectorElementType(C, ResultVT), N);
17466 return true;
17467 }
17468 }
17469
17470 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
17471 if (N.getOpcode() != ISD::SHL)
17472 return false;
17473
17474 SDValue N0 = N.getOperand(0);
17475 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
17476 N0.getOpcode() != RISCVISD::VZEXT_VL)
17477 return false;
17478 if (!N0->hasOneUse())
17479 return false;
17480
17481 APInt ShAmt;
17482 SDValue N1 = N.getOperand(1);
17483 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
17484 return false;
17485
17486 SDValue Src = N0.getOperand(0);
17487 EVT SrcVT = Src.getValueType();
17488 unsigned SrcElen = SrcVT.getScalarSizeInBits();
17489 unsigned ShAmtV = ShAmt.getZExtValue();
17490 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
17491 NewElen = std::max(NewElen, 8U);
17492
17493 // Skip if NewElen is not narrower than the original extended type.
17494 if (NewElen >= N0.getValueType().getScalarSizeInBits())
17495 return false;
17496
17497 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
17498 EVT NewVT = SrcVT.changeVectorElementType(*DAG.getContext(), NewEltVT);
17499
17500 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
17501 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
17502 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
17503 return true;
17504}
17505
17506/// Try to map an integer comparison with size > XLEN to vector instructions
17507/// before type legalization splits it up into chunks.
17508static SDValue
17510 const SDLoc &DL, SelectionDAG &DAG,
17511 const RISCVSubtarget &Subtarget) {
17512 assert(ISD::isIntEqualitySetCC(CC) && "Bad comparison predicate");
17513
17514 if (!Subtarget.hasVInstructions())
17515 return SDValue();
17516
17517 MVT XLenVT = Subtarget.getXLenVT();
17518 EVT OpVT = X.getValueType();
17519 // We're looking for an oversized integer equality comparison.
17520 if (!OpVT.isScalarInteger())
17521 return SDValue();
17522
17523 unsigned OpSize = OpVT.getSizeInBits();
17524 // The size should be larger than XLen and smaller than the maximum vector
17525 // size.
17526 if (OpSize <= Subtarget.getXLen() ||
17527 OpSize > Subtarget.getRealMinVLen() *
17529 return SDValue();
17530
17531 // Don't perform this combine if constructing the vector will be expensive.
17532 auto IsVectorBitCastCheap = [](SDValue X) {
17534 return isa<ConstantSDNode>(X) || X.getValueType().isVector() ||
17535 X.getOpcode() == ISD::LOAD;
17536 };
17537 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
17538 return SDValue();
17539
17541 Attribute::NoImplicitFloat))
17542 return SDValue();
17543
17544 // Bail out for non-byte-sized types.
17545 if (!OpVT.isByteSized())
17546 return SDValue();
17547
17548 unsigned VecSize = OpSize / 8;
17549 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, VecSize);
17550 EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VecSize);
17551
17552 SDValue VecX = DAG.getBitcast(VecVT, X);
17553 SDValue VecY = DAG.getBitcast(VecVT, Y);
17554 SDValue Mask = DAG.getAllOnesConstant(DL, CmpVT);
17555 SDValue VL = DAG.getConstant(VecSize, DL, XLenVT);
17556
17557 SDValue Cmp = DAG.getNode(ISD::VP_SETCC, DL, CmpVT, VecX, VecY,
17558 DAG.getCondCode(ISD::SETNE), Mask, VL);
17559 return DAG.getSetCC(DL, VT,
17560 DAG.getNode(ISD::VP_REDUCE_OR, DL, XLenVT,
17561 DAG.getConstant(0, DL, XLenVT), Cmp, Mask,
17562 VL),
17563 DAG.getConstant(0, DL, XLenVT), CC);
17564}
17565
17568 const RISCVSubtarget &Subtarget) {
17569 SelectionDAG &DAG = DCI.DAG;
17570 SDLoc dl(N);
17571 SDValue N0 = N->getOperand(0);
17572 SDValue N1 = N->getOperand(1);
17573 EVT VT = N->getValueType(0);
17574 EVT OpVT = N0.getValueType();
17575
17576 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
17577 // Looking for an equality compare.
17578 if (!isIntEqualitySetCC(Cond))
17579 return SDValue();
17580
17581 if (SDValue V =
17582 combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget))
17583 return V;
17584
17585 if (DCI.isAfterLegalizeDAG() && isa<ConstantSDNode>(N1) &&
17586 N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
17588 const APInt &AndRHSC = N0.getConstantOperandAPInt(1);
17589 // (X & -(1 << C)) == 0 -> (X >> C) == 0 if the AND constant can't use ANDI.
17590 if (isNullConstant(N1) && !isInt<12>(AndRHSC.getSExtValue()) &&
17591 AndRHSC.isNegatedPowerOf2()) {
17592 unsigned ShiftBits = AndRHSC.countr_zero();
17593 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, N0.getOperand(0),
17594 DAG.getConstant(ShiftBits, dl, OpVT));
17595 return DAG.getSetCC(dl, VT, Shift, N1, Cond);
17596 }
17597
17598 // Similar to above but handling the lower 32 bits by using sraiw. Allow
17599 // comparing with constants other than 0 if the constant can be folded into
17600 // addi or xori after shifting.
17601 uint64_t N1Int = cast<ConstantSDNode>(N1)->getZExtValue();
17602 uint64_t AndRHSInt = AndRHSC.getZExtValue();
17603 if (OpVT == MVT::i64 && isUInt<32>(AndRHSInt) &&
17604 isPowerOf2_32(-uint32_t(AndRHSInt)) && (N1Int & AndRHSInt) == N1Int) {
17605 unsigned ShiftBits = llvm::countr_zero(AndRHSInt);
17606 int64_t NewC = SignExtend64<32>(N1Int) >> ShiftBits;
17607 if (NewC >= -2048 && NewC <= 2048) {
17608 SDValue SExt =
17609 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OpVT, N0.getOperand(0),
17610 DAG.getValueType(MVT::i32));
17611 SDValue Shift = DAG.getNode(ISD::SRA, dl, OpVT, SExt,
17612 DAG.getConstant(ShiftBits, dl, OpVT));
17613 return DAG.getSetCC(dl, VT, Shift,
17614 DAG.getSignedConstant(NewC, dl, OpVT), Cond);
17615 }
17616 }
17617 }
17618
17619 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
17620 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
17621 // bit 31. Same for setne. C1' may be cheaper to materialize and the
17622 // sext_inreg can become a sext.w instead of a shift pair.
17623 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
17624 return SDValue();
17625
17626 // RHS needs to be a constant.
17627 auto *N1C = dyn_cast<ConstantSDNode>(N1);
17628 if (!N1C)
17629 return SDValue();
17630
17631 // LHS needs to be (and X, 0xffffffff).
17632 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
17634 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
17635 return SDValue();
17636
17637 // Don't do this if the sign bit is provably zero, it will be turned back into
17638 // an AND.
17639 APInt SignMask = APInt::getOneBitSet(64, 31);
17640 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
17641 return SDValue();
17642
17643 const APInt &C1 = N1C->getAPIntValue();
17644
17645 // If the constant is larger than 2^32 - 1 it is impossible for both sides
17646 // to be equal.
17647 if (C1.getActiveBits() > 32)
17648 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
17649
17650 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
17651 N0.getOperand(0), DAG.getValueType(MVT::i32));
17652 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
17653 dl, OpVT), Cond);
17654}
17655
17656static SDValue
17658 const RISCVSubtarget &Subtarget) {
17659 SelectionDAG &DAG = DCI.DAG;
17660 SDValue Src = N->getOperand(0);
17661 EVT VT = N->getValueType(0);
17662 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
17663 unsigned Opc = Src.getOpcode();
17664 SDLoc DL(N);
17665
17666 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
17667 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
17668 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
17669 Subtarget.hasStdExtZfhmin())
17670 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, DL, VT, Src.getOperand(0));
17671
17672 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
17673 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
17674 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
17675 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
17676 return DAG.getNode(RISCVISD::SLLW, DL, VT, Src.getOperand(0),
17677 Src.getOperand(1));
17678
17679 // Fold (sext_inreg (setcc), i1) -> (sub 0, (setcc))
17680 if (Opc == ISD::SETCC && SrcVT == MVT::i1 && DCI.isAfterLegalizeDAG())
17681 return DAG.getNegative(Src, DL, VT);
17682
17683 // Fold (sext_inreg (xor (setcc), -1), i1) -> (add (setcc), -1)
17684 if (Opc == ISD::XOR && SrcVT == MVT::i1 &&
17685 isAllOnesConstant(Src.getOperand(1)) &&
17686 Src.getOperand(0).getOpcode() == ISD::SETCC && DCI.isAfterLegalizeDAG())
17687 return DAG.getNode(ISD::ADD, DL, VT, Src.getOperand(0),
17688 DAG.getAllOnesConstant(DL, VT));
17689
17690 return SDValue();
17691}
17692
17693namespace {
17694// Forward declaration of the structure holding the necessary information to
17695// apply a combine.
17696struct CombineResult;
17697
17698enum ExtKind : uint8_t {
17699 ZExt = 1 << 0,
17700 SExt = 1 << 1,
17701 FPExt = 1 << 2,
17702 BF16Ext = 1 << 3
17703};
17704/// Helper class for folding sign/zero extensions.
17705/// In particular, this class is used for the following combines:
17706/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
17707/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
17708/// mul | mul_vl -> vwmul(u) | vwmul_su
17709/// shl | shl_vl -> vwsll
17710/// fadd -> vfwadd | vfwadd_w
17711/// fsub -> vfwsub | vfwsub_w
17712/// fmul -> vfwmul
17713/// An object of this class represents an operand of the operation we want to
17714/// combine.
17715/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
17716/// NodeExtensionHelper for `a` and one for `b`.
17717///
17718/// This class abstracts away how the extension is materialized and
17719/// how its number of users affect the combines.
17720///
17721/// In particular:
17722/// - VWADD_W is conceptually == add(op0, sext(op1))
17723/// - VWADDU_W == add(op0, zext(op1))
17724/// - VWSUB_W == sub(op0, sext(op1))
17725/// - VWSUBU_W == sub(op0, zext(op1))
17726/// - VFWADD_W == fadd(op0, fpext(op1))
17727/// - VFWSUB_W == fsub(op0, fpext(op1))
17728/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
17729/// zext|sext(smaller_value).
17730struct NodeExtensionHelper {
17731 /// Records if this operand is like being zero extended.
17732 bool SupportsZExt;
17733 /// Records if this operand is like being sign extended.
17734 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
17735 /// instance, a splat constant (e.g., 3), would support being both sign and
17736 /// zero extended.
17737 bool SupportsSExt;
17738 /// Records if this operand is like being floating point extended.
17739 bool SupportsFPExt;
17740 /// Records if this operand is extended from bf16.
17741 bool SupportsBF16Ext;
17742 /// This boolean captures whether we care if this operand would still be
17743 /// around after the folding happens.
17744 bool EnforceOneUse;
17745 /// Original value that this NodeExtensionHelper represents.
17746 SDValue OrigOperand;
17747
17748 /// Get the value feeding the extension or the value itself.
17749 /// E.g., for zext(a), this would return a.
17750 SDValue getSource() const {
17751 switch (OrigOperand.getOpcode()) {
17752 case ISD::ZERO_EXTEND:
17753 case ISD::SIGN_EXTEND:
17754 case RISCVISD::VSEXT_VL:
17755 case RISCVISD::VZEXT_VL:
17756 case RISCVISD::FP_EXTEND_VL:
17757 return OrigOperand.getOperand(0);
17758 default:
17759 return OrigOperand;
17760 }
17761 }
17762
17763 /// Check if this instance represents a splat.
17764 bool isSplat() const {
17765 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
17766 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
17767 }
17768
17769 /// Get the extended opcode.
17770 unsigned getExtOpc(ExtKind SupportsExt) const {
17771 switch (SupportsExt) {
17772 case ExtKind::SExt:
17773 return RISCVISD::VSEXT_VL;
17774 case ExtKind::ZExt:
17775 return RISCVISD::VZEXT_VL;
17776 case ExtKind::FPExt:
17777 case ExtKind::BF16Ext:
17778 return RISCVISD::FP_EXTEND_VL;
17779 }
17780 llvm_unreachable("Unknown ExtKind enum");
17781 }
17782
17783 /// Get or create a value that can feed \p Root with the given extension \p
17784 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
17785 /// operand. \see ::getSource().
17786 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
17787 const RISCVSubtarget &Subtarget,
17788 std::optional<ExtKind> SupportsExt) const {
17789 if (!SupportsExt.has_value())
17790 return OrigOperand;
17791
17792 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
17793
17794 SDValue Source = getSource();
17795 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
17796 if (Source.getValueType() == NarrowVT)
17797 return Source;
17798
17799 unsigned ExtOpc = getExtOpc(*SupportsExt);
17800
17801 // If we need an extension, we should be changing the type.
17802 SDLoc DL(OrigOperand);
17803 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
17804 switch (OrigOperand.getOpcode()) {
17805 case ISD::ZERO_EXTEND:
17806 case ISD::SIGN_EXTEND:
17807 case RISCVISD::VSEXT_VL:
17808 case RISCVISD::VZEXT_VL:
17809 case RISCVISD::FP_EXTEND_VL:
17810 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
17811 case ISD::SPLAT_VECTOR:
17812 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
17813 case RISCVISD::VMV_V_X_VL:
17814 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
17815 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
17816 case RISCVISD::VFMV_V_F_VL:
17817 Source = Source.getOperand(1);
17818 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
17819 Source = Source.getOperand(0);
17820 assert(Source.getValueType() == NarrowVT.getVectorElementType());
17821 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
17822 DAG.getUNDEF(NarrowVT), Source, VL);
17823 default:
17824 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
17825 // and that operand should already have the right NarrowVT so no
17826 // extension should be required at this point.
17827 llvm_unreachable("Unsupported opcode");
17828 }
17829 }
17830
17831 /// Helper function to get the narrow type for \p Root.
17832 /// The narrow type is the type of \p Root where we divided the size of each
17833 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
17834 /// \pre Both the narrow type and the original type should be legal.
17835 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
17836 MVT VT = Root->getSimpleValueType(0);
17837
17838 // Determine the narrow size.
17839 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
17840
17841 MVT EltVT = SupportsExt == ExtKind::BF16Ext ? MVT::bf16
17842 : SupportsExt == ExtKind::FPExt
17843 ? MVT::getFloatingPointVT(NarrowSize)
17844 : MVT::getIntegerVT(NarrowSize);
17845
17846 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
17847 "Trying to extend something we can't represent");
17848 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
17849 return NarrowVT;
17850 }
17851
17852 /// Get the opcode to materialize:
17853 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
17854 static unsigned getSExtOpcode(unsigned Opcode) {
17855 switch (Opcode) {
17856 case ISD::ADD:
17857 case RISCVISD::ADD_VL:
17858 case RISCVISD::VWADD_W_VL:
17859 case RISCVISD::VWADDU_W_VL:
17860 case ISD::OR:
17861 case RISCVISD::OR_VL:
17862 return RISCVISD::VWADD_VL;
17863 case ISD::SUB:
17864 case RISCVISD::SUB_VL:
17865 case RISCVISD::VWSUB_W_VL:
17866 case RISCVISD::VWSUBU_W_VL:
17867 return RISCVISD::VWSUB_VL;
17868 case ISD::MUL:
17869 case RISCVISD::MUL_VL:
17870 return RISCVISD::VWMUL_VL;
17871 default:
17872 llvm_unreachable("Unexpected opcode");
17873 }
17874 }
17875
17876 /// Get the opcode to materialize:
17877 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
17878 static unsigned getZExtOpcode(unsigned Opcode) {
17879 switch (Opcode) {
17880 case ISD::ADD:
17881 case RISCVISD::ADD_VL:
17882 case RISCVISD::VWADD_W_VL:
17883 case RISCVISD::VWADDU_W_VL:
17884 case ISD::OR:
17885 case RISCVISD::OR_VL:
17886 return RISCVISD::VWADDU_VL;
17887 case ISD::SUB:
17888 case RISCVISD::SUB_VL:
17889 case RISCVISD::VWSUB_W_VL:
17890 case RISCVISD::VWSUBU_W_VL:
17891 return RISCVISD::VWSUBU_VL;
17892 case ISD::MUL:
17893 case RISCVISD::MUL_VL:
17894 return RISCVISD::VWMULU_VL;
17895 case ISD::SHL:
17896 case RISCVISD::SHL_VL:
17897 return RISCVISD::VWSLL_VL;
17898 default:
17899 llvm_unreachable("Unexpected opcode");
17900 }
17901 }
17902
17903 /// Get the opcode to materialize:
17904 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
17905 static unsigned getFPExtOpcode(unsigned Opcode) {
17906 switch (Opcode) {
17907 case RISCVISD::FADD_VL:
17908 case RISCVISD::VFWADD_W_VL:
17909 return RISCVISD::VFWADD_VL;
17910 case RISCVISD::FSUB_VL:
17911 case RISCVISD::VFWSUB_W_VL:
17912 return RISCVISD::VFWSUB_VL;
17913 case RISCVISD::FMUL_VL:
17914 return RISCVISD::VFWMUL_VL;
17915 case RISCVISD::VFMADD_VL:
17916 return RISCVISD::VFWMADD_VL;
17917 case RISCVISD::VFMSUB_VL:
17918 return RISCVISD::VFWMSUB_VL;
17919 case RISCVISD::VFNMADD_VL:
17920 return RISCVISD::VFWNMADD_VL;
17921 case RISCVISD::VFNMSUB_VL:
17922 return RISCVISD::VFWNMSUB_VL;
17923 default:
17924 llvm_unreachable("Unexpected opcode");
17925 }
17926 }
17927
17928 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
17929 /// newOpcode(a, b).
17930 static unsigned getSUOpcode(unsigned Opcode) {
17931 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
17932 "SU is only supported for MUL");
17933 return RISCVISD::VWMULSU_VL;
17934 }
17935
17936 /// Get the opcode to materialize
17937 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
17938 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
17939 switch (Opcode) {
17940 case ISD::ADD:
17941 case RISCVISD::ADD_VL:
17942 case ISD::OR:
17943 case RISCVISD::OR_VL:
17944 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
17945 : RISCVISD::VWADDU_W_VL;
17946 case ISD::SUB:
17947 case RISCVISD::SUB_VL:
17948 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
17949 : RISCVISD::VWSUBU_W_VL;
17950 case RISCVISD::FADD_VL:
17951 return RISCVISD::VFWADD_W_VL;
17952 case RISCVISD::FSUB_VL:
17953 return RISCVISD::VFWSUB_W_VL;
17954 default:
17955 llvm_unreachable("Unexpected opcode");
17956 }
17957 }
17958
17959 using CombineToTry = std::function<std::optional<CombineResult>(
17960 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
17961 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
17962 const RISCVSubtarget &)>;
17963
17964 /// Check if this node needs to be fully folded or extended for all users.
17965 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
17966
17967 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
17968 const RISCVSubtarget &Subtarget) {
17969 unsigned Opc = OrigOperand.getOpcode();
17970 MVT VT = OrigOperand.getSimpleValueType();
17971
17972 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
17973 "Unexpected Opcode");
17974
17975 // The pasthru must be undef for tail agnostic.
17976 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
17977 return;
17978
17979 // Get the scalar value.
17980 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
17981 : OrigOperand.getOperand(1);
17982
17983 // See if we have enough sign bits or zero bits in the scalar to use a
17984 // widening opcode by splatting to smaller element size.
17985 unsigned EltBits = VT.getScalarSizeInBits();
17986 unsigned ScalarBits = Op.getValueSizeInBits();
17987 // If we're not getting all bits from the element, we need special handling.
17988 if (ScalarBits < EltBits) {
17989 // This should only occur on RV32.
17990 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
17991 !Subtarget.is64Bit() && "Unexpected splat");
17992 // vmv.v.x sign extends narrow inputs.
17993 SupportsSExt = true;
17994
17995 // If the input is positive, then sign extend is also zero extend.
17996 if (DAG.SignBitIsZero(Op))
17997 SupportsZExt = true;
17998
17999 EnforceOneUse = false;
18000 return;
18001 }
18002
18003 unsigned NarrowSize = EltBits / 2;
18004 // If the narrow type cannot be expressed with a legal VMV,
18005 // this is not a valid candidate.
18006 if (NarrowSize < 8)
18007 return;
18008
18009 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
18010 SupportsSExt = true;
18011
18012 if (DAG.MaskedValueIsZero(Op,
18013 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
18014 SupportsZExt = true;
18015
18016 EnforceOneUse = false;
18017 }
18018
18019 bool isSupportedFPExtend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
18020 return (NarrowEltVT == MVT::f32 ||
18021 (NarrowEltVT == MVT::f16 && Subtarget.hasVInstructionsF16()));
18022 }
18023
18024 bool isSupportedBF16Extend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
18025 return NarrowEltVT == MVT::bf16 &&
18026 (Subtarget.hasStdExtZvfbfwma() || Subtarget.hasVInstructionsBF16());
18027 }
18028
18029 /// Helper method to set the various fields of this struct based on the
18030 /// type of \p Root.
18031 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
18032 const RISCVSubtarget &Subtarget) {
18033 SupportsZExt = false;
18034 SupportsSExt = false;
18035 SupportsFPExt = false;
18036 SupportsBF16Ext = false;
18037 EnforceOneUse = true;
18038 unsigned Opc = OrigOperand.getOpcode();
18039 // For the nodes we handle below, we end up using their inputs directly: see
18040 // getSource(). However since they either don't have a passthru or we check
18041 // that their passthru is undef, we can safely ignore their mask and VL.
18042 switch (Opc) {
18043 case ISD::ZERO_EXTEND:
18044 case ISD::SIGN_EXTEND: {
18045 MVT VT = OrigOperand.getSimpleValueType();
18046 if (!VT.isVector())
18047 break;
18048
18049 SDValue NarrowElt = OrigOperand.getOperand(0);
18050 MVT NarrowVT = NarrowElt.getSimpleValueType();
18051 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
18052 if (NarrowVT.getVectorElementType() == MVT::i1)
18053 break;
18054
18055 SupportsZExt = Opc == ISD::ZERO_EXTEND;
18056 SupportsSExt = Opc == ISD::SIGN_EXTEND;
18057 break;
18058 }
18059 case RISCVISD::VZEXT_VL:
18060 SupportsZExt = true;
18061 break;
18062 case RISCVISD::VSEXT_VL:
18063 SupportsSExt = true;
18064 break;
18065 case RISCVISD::FP_EXTEND_VL: {
18066 MVT NarrowEltVT =
18068 if (isSupportedFPExtend(NarrowEltVT, Subtarget))
18069 SupportsFPExt = true;
18070 if (isSupportedBF16Extend(NarrowEltVT, Subtarget))
18071 SupportsBF16Ext = true;
18072
18073 break;
18074 }
18075 case ISD::SPLAT_VECTOR:
18076 case RISCVISD::VMV_V_X_VL:
18077 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
18078 break;
18079 case RISCVISD::VFMV_V_F_VL: {
18080 MVT VT = OrigOperand.getSimpleValueType();
18081
18082 if (!OrigOperand.getOperand(0).isUndef())
18083 break;
18084
18085 SDValue Op = OrigOperand.getOperand(1);
18086 if (Op.getOpcode() != ISD::FP_EXTEND)
18087 break;
18088
18089 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
18090 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
18091 if (NarrowSize != ScalarBits)
18092 break;
18093
18094 if (isSupportedFPExtend(Op.getOperand(0).getSimpleValueType(), Subtarget))
18095 SupportsFPExt = true;
18096 if (isSupportedBF16Extend(Op.getOperand(0).getSimpleValueType(),
18097 Subtarget))
18098 SupportsBF16Ext = true;
18099 break;
18100 }
18101 default:
18102 break;
18103 }
18104 }
18105
18106 /// Check if \p Root supports any extension folding combines.
18107 static bool isSupportedRoot(const SDNode *Root,
18108 const RISCVSubtarget &Subtarget) {
18109 switch (Root->getOpcode()) {
18110 case ISD::ADD:
18111 case ISD::SUB:
18112 case ISD::MUL: {
18113 return Root->getValueType(0).isScalableVector();
18114 }
18115 case ISD::OR: {
18116 return Root->getValueType(0).isScalableVector() &&
18117 Root->getFlags().hasDisjoint();
18118 }
18119 // Vector Widening Integer Add/Sub/Mul Instructions
18120 case RISCVISD::ADD_VL:
18121 case RISCVISD::MUL_VL:
18122 case RISCVISD::VWADD_W_VL:
18123 case RISCVISD::VWADDU_W_VL:
18124 case RISCVISD::SUB_VL:
18125 case RISCVISD::VWSUB_W_VL:
18126 case RISCVISD::VWSUBU_W_VL:
18127 // Vector Widening Floating-Point Add/Sub/Mul Instructions
18128 case RISCVISD::FADD_VL:
18129 case RISCVISD::FSUB_VL:
18130 case RISCVISD::FMUL_VL:
18131 case RISCVISD::VFWADD_W_VL:
18132 case RISCVISD::VFWSUB_W_VL:
18133 return true;
18134 case RISCVISD::OR_VL:
18135 return Root->getFlags().hasDisjoint();
18136 case ISD::SHL:
18137 return Root->getValueType(0).isScalableVector() &&
18138 Subtarget.hasStdExtZvbb();
18139 case RISCVISD::SHL_VL:
18140 return Subtarget.hasStdExtZvbb();
18141 case RISCVISD::VFMADD_VL:
18142 case RISCVISD::VFNMSUB_VL:
18143 case RISCVISD::VFNMADD_VL:
18144 case RISCVISD::VFMSUB_VL:
18145 return true;
18146 default:
18147 return false;
18148 }
18149 }
18150
18151 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
18152 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
18153 const RISCVSubtarget &Subtarget) {
18154 assert(isSupportedRoot(Root, Subtarget) &&
18155 "Trying to build an helper with an "
18156 "unsupported root");
18157 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
18159 OrigOperand = Root->getOperand(OperandIdx);
18160
18161 unsigned Opc = Root->getOpcode();
18162 switch (Opc) {
18163 // We consider
18164 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
18165 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
18166 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
18167 case RISCVISD::VWADD_W_VL:
18168 case RISCVISD::VWADDU_W_VL:
18169 case RISCVISD::VWSUB_W_VL:
18170 case RISCVISD::VWSUBU_W_VL:
18171 case RISCVISD::VFWADD_W_VL:
18172 case RISCVISD::VFWSUB_W_VL:
18173 // Operand 1 can't be changed.
18174 if (OperandIdx == 1)
18175 break;
18176 [[fallthrough]];
18177 default:
18178 fillUpExtensionSupport(Root, DAG, Subtarget);
18179 break;
18180 }
18181 }
18182
18183 /// Helper function to get the Mask and VL from \p Root.
18184 static std::pair<SDValue, SDValue>
18185 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
18186 const RISCVSubtarget &Subtarget) {
18187 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
18188 switch (Root->getOpcode()) {
18189 case ISD::ADD:
18190 case ISD::SUB:
18191 case ISD::MUL:
18192 case ISD::OR:
18193 case ISD::SHL: {
18194 SDLoc DL(Root);
18195 MVT VT = Root->getSimpleValueType(0);
18196 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
18197 }
18198 default:
18199 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
18200 }
18201 }
18202
18203 /// Helper function to check if \p N is commutative with respect to the
18204 /// foldings that are supported by this class.
18205 static bool isCommutative(const SDNode *N) {
18206 switch (N->getOpcode()) {
18207 case ISD::ADD:
18208 case ISD::MUL:
18209 case ISD::OR:
18210 case RISCVISD::ADD_VL:
18211 case RISCVISD::MUL_VL:
18212 case RISCVISD::OR_VL:
18213 case RISCVISD::FADD_VL:
18214 case RISCVISD::FMUL_VL:
18215 case RISCVISD::VFMADD_VL:
18216 case RISCVISD::VFNMSUB_VL:
18217 case RISCVISD::VFNMADD_VL:
18218 case RISCVISD::VFMSUB_VL:
18219 return true;
18220 case RISCVISD::VWADD_W_VL:
18221 case RISCVISD::VWADDU_W_VL:
18222 case ISD::SUB:
18223 case RISCVISD::SUB_VL:
18224 case RISCVISD::VWSUB_W_VL:
18225 case RISCVISD::VWSUBU_W_VL:
18226 case RISCVISD::VFWADD_W_VL:
18227 case RISCVISD::FSUB_VL:
18228 case RISCVISD::VFWSUB_W_VL:
18229 case ISD::SHL:
18230 case RISCVISD::SHL_VL:
18231 return false;
18232 default:
18233 llvm_unreachable("Unexpected opcode");
18234 }
18235 }
18236
18237 /// Get a list of combine to try for folding extensions in \p Root.
18238 /// Note that each returned CombineToTry function doesn't actually modify
18239 /// anything. Instead they produce an optional CombineResult that if not None,
18240 /// need to be materialized for the combine to be applied.
18241 /// \see CombineResult::materialize.
18242 /// If the related CombineToTry function returns std::nullopt, that means the
18243 /// combine didn't match.
18245 getSupportedFoldings(const SDNode *Root, const RISCVSubtarget &Subtarget);
18246};
18247
18248/// Helper structure that holds all the necessary information to materialize a
18249/// combine that does some extension folding.
18250struct CombineResult {
18251 /// Opcode to be generated when materializing the combine.
18252 unsigned TargetOpcode;
18253 // No value means no extension is needed.
18254 std::optional<ExtKind> LHSExt;
18255 std::optional<ExtKind> RHSExt;
18256 /// Root of the combine.
18257 SDNode *Root;
18258 /// LHS of the TargetOpcode.
18259 NodeExtensionHelper LHS;
18260 /// RHS of the TargetOpcode.
18261 NodeExtensionHelper RHS;
18262
18263 CombineResult(unsigned TargetOpcode, SDNode *Root,
18264 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
18265 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
18266 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
18267 LHS(LHS), RHS(RHS) {}
18268
18269 /// Return a value that uses TargetOpcode and that can be used to replace
18270 /// Root.
18271 /// The actual replacement is *not* done in that method.
18272 SDValue materialize(SelectionDAG &DAG,
18273 const RISCVSubtarget &Subtarget) const {
18274 SDValue Mask, VL, Passthru;
18275 std::tie(Mask, VL) =
18276 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
18277 switch (Root->getOpcode()) {
18278 default:
18279 Passthru = Root->getOperand(2);
18280 break;
18281 case ISD::ADD:
18282 case ISD::SUB:
18283 case ISD::MUL:
18284 case ISD::OR:
18285 case ISD::SHL:
18286 Passthru = DAG.getUNDEF(Root->getValueType(0));
18287 break;
18288 }
18289 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
18290 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
18291 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
18292 Passthru, Mask, VL);
18293 }
18294};
18295
18296/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
18297/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
18298/// are zext) and LHS and RHS can be folded into Root.
18299/// AllowExtMask define which form `ext` can take in this pattern.
18300///
18301/// \note If the pattern can match with both zext and sext, the returned
18302/// CombineResult will feature the zext result.
18303///
18304/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18305/// can be used to apply the pattern.
18306static std::optional<CombineResult>
18307canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
18308 const NodeExtensionHelper &RHS,
18309 uint8_t AllowExtMask, SelectionDAG &DAG,
18310 const RISCVSubtarget &Subtarget) {
18311 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
18312 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
18313 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
18314 /*RHSExt=*/{ExtKind::ZExt});
18315 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
18316 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
18317 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
18318 /*RHSExt=*/{ExtKind::SExt});
18319 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
18320 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
18321 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
18322 /*RHSExt=*/{ExtKind::FPExt});
18323 if ((AllowExtMask & ExtKind::BF16Ext) && LHS.SupportsBF16Ext &&
18324 RHS.SupportsBF16Ext)
18325 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
18326 Root, LHS, /*LHSExt=*/{ExtKind::BF16Ext}, RHS,
18327 /*RHSExt=*/{ExtKind::BF16Ext});
18328 return std::nullopt;
18329}
18330
18331/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
18332/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
18333/// are zext) and LHS and RHS can be folded into Root.
18334///
18335/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18336/// can be used to apply the pattern.
18337static std::optional<CombineResult>
18338canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
18339 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18340 const RISCVSubtarget &Subtarget) {
18341 return canFoldToVWWithSameExtensionImpl(
18342 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
18343 Subtarget);
18344}
18345
18346/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
18347///
18348/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18349/// can be used to apply the pattern.
18350static std::optional<CombineResult>
18351canFoldToVWWithSameExtZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
18352 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18353 const RISCVSubtarget &Subtarget) {
18354 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
18355 Subtarget);
18356}
18357
18358/// Check if \p Root follows a pattern Root(bf16ext(LHS), bf16ext(RHS))
18359///
18360/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18361/// can be used to apply the pattern.
18362static std::optional<CombineResult>
18363canFoldToVWWithSameExtBF16(SDNode *Root, const NodeExtensionHelper &LHS,
18364 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18365 const RISCVSubtarget &Subtarget) {
18366 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::BF16Ext, DAG,
18367 Subtarget);
18368}
18369
18370/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
18371///
18372/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18373/// can be used to apply the pattern.
18374static std::optional<CombineResult>
18375canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
18376 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18377 const RISCVSubtarget &Subtarget) {
18378 if (RHS.SupportsFPExt)
18379 return CombineResult(
18380 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
18381 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
18382
18383 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
18384 // sext/zext?
18385 // Control this behavior behind an option (AllowSplatInVW_W) for testing
18386 // purposes.
18387 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
18388 return CombineResult(
18389 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
18390 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
18391 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
18392 return CombineResult(
18393 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
18394 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
18395 return std::nullopt;
18396}
18397
18398/// Check if \p Root follows a pattern Root(sext(LHS), RHS)
18399///
18400/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18401/// can be used to apply the pattern.
18402static std::optional<CombineResult>
18403canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
18404 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18405 const RISCVSubtarget &Subtarget) {
18406 if (LHS.SupportsSExt)
18407 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
18408 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
18409 /*RHSExt=*/std::nullopt);
18410 return std::nullopt;
18411}
18412
18413/// Check if \p Root follows a pattern Root(zext(LHS), RHS)
18414///
18415/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18416/// can be used to apply the pattern.
18417static std::optional<CombineResult>
18418canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
18419 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18420 const RISCVSubtarget &Subtarget) {
18421 if (LHS.SupportsZExt)
18422 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
18423 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
18424 /*RHSExt=*/std::nullopt);
18425 return std::nullopt;
18426}
18427
18428/// Check if \p Root follows a pattern Root(fpext(LHS), RHS)
18429///
18430/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18431/// can be used to apply the pattern.
18432static std::optional<CombineResult>
18433canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
18434 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18435 const RISCVSubtarget &Subtarget) {
18436 if (LHS.SupportsFPExt)
18437 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
18438 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
18439 /*RHSExt=*/std::nullopt);
18440 return std::nullopt;
18441}
18442
18443/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
18444///
18445/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18446/// can be used to apply the pattern.
18447static std::optional<CombineResult>
18448canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
18449 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18450 const RISCVSubtarget &Subtarget) {
18451
18452 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
18453 return std::nullopt;
18454 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
18455 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
18456 /*RHSExt=*/{ExtKind::ZExt});
18457}
18458
18460NodeExtensionHelper::getSupportedFoldings(const SDNode *Root,
18461 const RISCVSubtarget &Subtarget) {
18462 SmallVector<CombineToTry> Strategies;
18463 switch (Root->getOpcode()) {
18464 case ISD::ADD:
18465 case ISD::SUB:
18466 case ISD::OR:
18467 case RISCVISD::ADD_VL:
18468 case RISCVISD::SUB_VL:
18469 case RISCVISD::OR_VL:
18470 case RISCVISD::FADD_VL:
18471 case RISCVISD::FSUB_VL:
18472 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
18473 Strategies.push_back(canFoldToVWWithSameExtension);
18474 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
18475 Strategies.push_back(canFoldToVW_W);
18476 break;
18477 case RISCVISD::FMUL_VL:
18478 case RISCVISD::VFMADD_VL:
18479 case RISCVISD::VFMSUB_VL:
18480 case RISCVISD::VFNMADD_VL:
18481 case RISCVISD::VFNMSUB_VL:
18482 Strategies.push_back(canFoldToVWWithSameExtension);
18483 if (Subtarget.hasStdExtZvfbfa() && Root->getOpcode() != RISCVISD::FMUL_VL)
18484 // TODO: Once other widen operations are supported we can merge
18485 // canFoldToVWWithSameExtension and canFoldToVWWithSameExtBF16.
18486 Strategies.push_back(canFoldToVWWithSameExtBF16);
18487 else if (Subtarget.hasStdExtZvfbfwma() &&
18488 Root->getOpcode() == RISCVISD::VFMADD_VL)
18489 Strategies.push_back(canFoldToVWWithSameExtBF16);
18490 break;
18491 case ISD::MUL:
18492 case RISCVISD::MUL_VL:
18493 // mul -> vwmul(u)
18494 Strategies.push_back(canFoldToVWWithSameExtension);
18495 // mul -> vwmulsu
18496 Strategies.push_back(canFoldToVW_SU);
18497 break;
18498 case ISD::SHL:
18499 case RISCVISD::SHL_VL:
18500 // shl -> vwsll
18501 Strategies.push_back(canFoldToVWWithSameExtZEXT);
18502 break;
18503 case RISCVISD::VWADD_W_VL:
18504 case RISCVISD::VWSUB_W_VL:
18505 // vwadd_w|vwsub_w -> vwadd|vwsub
18506 Strategies.push_back(canFoldToVWWithSEXT);
18507 break;
18508 case RISCVISD::VWADDU_W_VL:
18509 case RISCVISD::VWSUBU_W_VL:
18510 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
18511 Strategies.push_back(canFoldToVWWithZEXT);
18512 break;
18513 case RISCVISD::VFWADD_W_VL:
18514 case RISCVISD::VFWSUB_W_VL:
18515 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
18516 Strategies.push_back(canFoldToVWWithFPEXT);
18517 break;
18518 default:
18519 llvm_unreachable("Unexpected opcode");
18520 }
18521 return Strategies;
18522}
18523} // End anonymous namespace.
18524
18526 // TODO: Extend this to other binops using generic identity logic
18527 assert(N->getOpcode() == RISCVISD::ADD_VL);
18528 SDValue A = N->getOperand(0);
18529 SDValue B = N->getOperand(1);
18530 SDValue Passthru = N->getOperand(2);
18531 if (!Passthru.isUndef())
18532 // TODO:This could be a vmerge instead
18533 return SDValue();
18534 ;
18536 return A;
18537 // Peek through fixed to scalable
18538 if (B.getOpcode() == ISD::INSERT_SUBVECTOR && B.getOperand(0).isUndef() &&
18539 ISD::isConstantSplatVectorAllZeros(B.getOperand(1).getNode()))
18540 return A;
18541 return SDValue();
18542}
18543
18544/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
18545/// The supported combines are:
18546/// add | add_vl | or disjoint | or_vl disjoint -> vwadd(u) | vwadd(u)_w
18547/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
18548/// mul | mul_vl -> vwmul(u) | vwmul_su
18549/// shl | shl_vl -> vwsll
18550/// fadd_vl -> vfwadd | vfwadd_w
18551/// fsub_vl -> vfwsub | vfwsub_w
18552/// fmul_vl -> vfwmul
18553/// vwadd_w(u) -> vwadd(u)
18554/// vwsub_w(u) -> vwsub(u)
18555/// vfwadd_w -> vfwadd
18556/// vfwsub_w -> vfwsub
18559 const RISCVSubtarget &Subtarget) {
18560 SelectionDAG &DAG = DCI.DAG;
18561 if (DCI.isBeforeLegalize())
18562 return SDValue();
18563
18564 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
18565 return SDValue();
18566
18567 SmallVector<SDNode *> Worklist;
18568 SmallPtrSet<SDNode *, 8> Inserted;
18569 SmallPtrSet<SDNode *, 8> ExtensionsToRemove;
18570 Worklist.push_back(N);
18571 Inserted.insert(N);
18572 SmallVector<CombineResult> CombinesToApply;
18573
18574 while (!Worklist.empty()) {
18575 SDNode *Root = Worklist.pop_back_val();
18576
18577 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
18578 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
18579 auto AppendUsersIfNeeded =
18580 [&Worklist, &Subtarget, &Inserted,
18581 &ExtensionsToRemove](const NodeExtensionHelper &Op) {
18582 if (Op.needToPromoteOtherUsers()) {
18583 // Remember that we're supposed to remove this extension.
18584 ExtensionsToRemove.insert(Op.OrigOperand.getNode());
18585 for (SDUse &Use : Op.OrigOperand->uses()) {
18586 SDNode *TheUser = Use.getUser();
18587 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
18588 return false;
18589 // We only support the first 2 operands of FMA.
18590 if (Use.getOperandNo() >= 2)
18591 return false;
18592 if (Inserted.insert(TheUser).second)
18593 Worklist.push_back(TheUser);
18594 }
18595 }
18596 return true;
18597 };
18598
18599 // Control the compile time by limiting the number of node we look at in
18600 // total.
18601 if (Inserted.size() > ExtensionMaxWebSize)
18602 return SDValue();
18603
18605 NodeExtensionHelper::getSupportedFoldings(Root, Subtarget);
18606
18607 assert(!FoldingStrategies.empty() && "Nothing to be folded");
18608 bool Matched = false;
18609 for (int Attempt = 0;
18610 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
18611 ++Attempt) {
18612
18613 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
18614 FoldingStrategies) {
18615 std::optional<CombineResult> Res =
18616 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
18617 if (Res) {
18618 // If this strategy wouldn't remove an extension we're supposed to
18619 // remove, reject it.
18620 if (!Res->LHSExt.has_value() &&
18621 ExtensionsToRemove.contains(LHS.OrigOperand.getNode()))
18622 continue;
18623 if (!Res->RHSExt.has_value() &&
18624 ExtensionsToRemove.contains(RHS.OrigOperand.getNode()))
18625 continue;
18626
18627 Matched = true;
18628 CombinesToApply.push_back(*Res);
18629 // All the inputs that are extended need to be folded, otherwise
18630 // we would be leaving the old input (since it is may still be used),
18631 // and the new one.
18632 if (Res->LHSExt.has_value())
18633 if (!AppendUsersIfNeeded(LHS))
18634 return SDValue();
18635 if (Res->RHSExt.has_value())
18636 if (!AppendUsersIfNeeded(RHS))
18637 return SDValue();
18638 break;
18639 }
18640 }
18641 std::swap(LHS, RHS);
18642 }
18643 // Right now we do an all or nothing approach.
18644 if (!Matched)
18645 return SDValue();
18646 }
18647 // Store the value for the replacement of the input node separately.
18648 SDValue InputRootReplacement;
18649 // We do the RAUW after we materialize all the combines, because some replaced
18650 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
18651 // some of these nodes may appear in the NodeExtensionHelpers of some of the
18652 // yet-to-be-visited CombinesToApply roots.
18654 ValuesToReplace.reserve(CombinesToApply.size());
18655 for (CombineResult Res : CombinesToApply) {
18656 SDValue NewValue = Res.materialize(DAG, Subtarget);
18657 if (!InputRootReplacement) {
18658 assert(Res.Root == N &&
18659 "First element is expected to be the current node");
18660 InputRootReplacement = NewValue;
18661 } else {
18662 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
18663 }
18664 }
18665 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
18666 DCI.CombineTo(OldNewValues.first.getNode(), OldNewValues.second);
18667 }
18668 return InputRootReplacement;
18669}
18670
18671// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
18672// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
18673// y will be the Passthru and cond will be the Mask.
18675 unsigned Opc = N->getOpcode();
18676 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
18677 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
18678
18679 SDValue Y = N->getOperand(0);
18680 SDValue MergeOp = N->getOperand(1);
18681 unsigned MergeOpc = MergeOp.getOpcode();
18682
18683 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
18684 return SDValue();
18685
18686 SDValue X = MergeOp->getOperand(1);
18687
18688 if (!MergeOp.hasOneUse())
18689 return SDValue();
18690
18691 // Passthru should be undef
18692 SDValue Passthru = N->getOperand(2);
18693 if (!Passthru.isUndef())
18694 return SDValue();
18695
18696 // Mask should be all ones
18697 SDValue Mask = N->getOperand(3);
18698 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
18699 return SDValue();
18700
18701 // False value of MergeOp should be all zeros
18702 SDValue Z = MergeOp->getOperand(2);
18703
18704 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
18705 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
18706 Z = Z.getOperand(1);
18707
18708 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
18709 return SDValue();
18710
18711 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
18712 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
18713 N->getFlags());
18714}
18715
18718 const RISCVSubtarget &Subtarget) {
18719 [[maybe_unused]] unsigned Opc = N->getOpcode();
18720 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
18721 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
18722
18723 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18724 return V;
18725
18726 return combineVWADDSUBWSelect(N, DCI.DAG);
18727}
18728
18729// Helper function for performMemPairCombine.
18730// Try to combine the memory loads/stores LSNode1 and LSNode2
18731// into a single memory pair operation.
18733 LSBaseSDNode *LSNode2, SDValue BasePtr,
18734 uint64_t Imm) {
18736 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
18737
18738 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
18739 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
18740 return SDValue();
18741
18743 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
18744
18745 // The new operation has twice the width.
18746 MVT XLenVT = Subtarget.getXLenVT();
18747 EVT MemVT = LSNode1->getMemoryVT();
18748 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
18749 MachineMemOperand *MMO = LSNode1->getMemOperand();
18751 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
18752
18753 if (LSNode1->getOpcode() == ISD::LOAD) {
18754 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
18755 unsigned Opcode;
18756 if (MemVT == MVT::i32)
18757 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
18758 else
18759 Opcode = RISCVISD::TH_LDD;
18760
18761 SDValue Res = DAG.getMemIntrinsicNode(
18762 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
18763 {LSNode1->getChain(), BasePtr,
18764 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
18765 NewMemVT, NewMMO);
18766
18767 SDValue Node1 =
18768 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
18769 SDValue Node2 =
18770 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
18771
18772 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
18773 return Node1;
18774 } else {
18775 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
18776
18777 SDValue Res = DAG.getMemIntrinsicNode(
18778 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
18779 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
18780 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
18781 NewMemVT, NewMMO);
18782
18783 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
18784 return Res;
18785 }
18786}
18787
18788// Try to combine two adjacent loads/stores to a single pair instruction from
18789// the XTHeadMemPair vendor extension.
18792 SelectionDAG &DAG = DCI.DAG;
18794 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
18795
18796 // Target does not support load/store pair.
18797 if (!Subtarget.hasVendorXTHeadMemPair())
18798 return SDValue();
18799
18800 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
18801 EVT MemVT = LSNode1->getMemoryVT();
18802 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
18803
18804 // No volatile, indexed or atomic loads/stores.
18805 if (!LSNode1->isSimple() || LSNode1->isIndexed())
18806 return SDValue();
18807
18808 // Function to get a base + constant representation from a memory value.
18809 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
18810 if (Ptr->getOpcode() == ISD::ADD)
18811 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
18812 return {Ptr->getOperand(0), C1->getZExtValue()};
18813 return {Ptr, 0};
18814 };
18815
18816 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
18817
18818 SDValue Chain = N->getOperand(0);
18819 for (SDUse &Use : Chain->uses()) {
18820 if (Use.getUser() != N && Use.getResNo() == 0 &&
18821 Use.getUser()->getOpcode() == N->getOpcode()) {
18823
18824 // No volatile, indexed or atomic loads/stores.
18825 if (!LSNode2->isSimple() || LSNode2->isIndexed())
18826 continue;
18827
18828 // Check if LSNode1 and LSNode2 have the same type and extension.
18829 if (LSNode1->getOpcode() == ISD::LOAD)
18830 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
18832 continue;
18833
18834 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
18835 continue;
18836
18837 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
18838
18839 // Check if the base pointer is the same for both instruction.
18840 if (Base1 != Base2)
18841 continue;
18842
18843 // Check if the offsets match the XTHeadMemPair encoding constraints.
18844 bool Valid = false;
18845 if (MemVT == MVT::i32) {
18846 // Check for adjacent i32 values and a 2-bit index.
18847 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
18848 Valid = true;
18849 } else if (MemVT == MVT::i64) {
18850 // Check for adjacent i64 values and a 2-bit index.
18851 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
18852 Valid = true;
18853 }
18854
18855 if (!Valid)
18856 continue;
18857
18858 // Try to combine.
18859 if (SDValue Res =
18860 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
18861 return Res;
18862 }
18863 }
18864
18865 return SDValue();
18866}
18867
18868// Fold
18869// (fp_to_int (froundeven X)) -> fcvt X, rne
18870// (fp_to_int (ftrunc X)) -> fcvt X, rtz
18871// (fp_to_int (ffloor X)) -> fcvt X, rdn
18872// (fp_to_int (fceil X)) -> fcvt X, rup
18873// (fp_to_int (fround X)) -> fcvt X, rmm
18874// (fp_to_int (frint X)) -> fcvt X
18877 const RISCVSubtarget &Subtarget) {
18878 SelectionDAG &DAG = DCI.DAG;
18879 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18880 MVT XLenVT = Subtarget.getXLenVT();
18881
18882 SDValue Src = N->getOperand(0);
18883
18884 // Don't do this for strict-fp Src.
18885 if (Src->isStrictFPOpcode())
18886 return SDValue();
18887
18888 // Ensure the FP type is legal.
18889 if (!TLI.isTypeLegal(Src.getValueType()))
18890 return SDValue();
18891
18892 // Don't do this for f16 with Zfhmin and not Zfh.
18893 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
18894 return SDValue();
18895
18896 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
18897 // If the result is invalid, we didn't find a foldable instruction.
18898 if (FRM == RISCVFPRndMode::Invalid)
18899 return SDValue();
18900
18901 SDLoc DL(N);
18902 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
18903 EVT VT = N->getValueType(0);
18904
18905 if (VT.isVector() && TLI.isTypeLegal(VT)) {
18906 MVT SrcVT = Src.getSimpleValueType();
18907 MVT SrcContainerVT = SrcVT;
18908 MVT ContainerVT = VT.getSimpleVT();
18909 SDValue XVal = Src.getOperand(0);
18910
18911 // For widening and narrowing conversions we just combine it into a
18912 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
18913 // end up getting lowered to their appropriate pseudo instructions based on
18914 // their operand types
18915 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
18916 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
18917 return SDValue();
18918
18919 // Make fixed-length vectors scalable first
18920 if (SrcVT.isFixedLengthVector()) {
18921 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
18922 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
18923 ContainerVT =
18924 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
18925 }
18926
18927 auto [Mask, VL] =
18928 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
18929
18930 SDValue FpToInt;
18931 if (FRM == RISCVFPRndMode::RTZ) {
18932 // Use the dedicated trunc static rounding mode if we're truncating so we
18933 // don't need to generate calls to fsrmi/fsrm
18934 unsigned Opc =
18935 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
18936 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
18937 } else {
18938 unsigned Opc =
18939 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
18940 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
18941 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
18942 }
18943
18944 // If converted from fixed-length to scalable, convert back
18945 if (VT.isFixedLengthVector())
18946 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
18947
18948 return FpToInt;
18949 }
18950
18951 // Only handle XLen or i32 types. Other types narrower than XLen will
18952 // eventually be legalized to XLenVT.
18953 if (VT != MVT::i32 && VT != XLenVT)
18954 return SDValue();
18955
18956 unsigned Opc;
18957 if (VT == XLenVT)
18958 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
18959 else
18960 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
18961
18962 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
18963 DAG.getTargetConstant(FRM, DL, XLenVT));
18964 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
18965}
18966
18967// Fold
18968// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
18969// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
18970// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
18971// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
18972// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
18973// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
18976 const RISCVSubtarget &Subtarget) {
18977 SelectionDAG &DAG = DCI.DAG;
18978 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18979 MVT XLenVT = Subtarget.getXLenVT();
18980
18981 // Only handle XLen types. Other types narrower than XLen will eventually be
18982 // legalized to XLenVT.
18983 EVT DstVT = N->getValueType(0);
18984 if (DstVT != XLenVT)
18985 return SDValue();
18986
18987 SDValue Src = N->getOperand(0);
18988
18989 // Don't do this for strict-fp Src.
18990 if (Src->isStrictFPOpcode())
18991 return SDValue();
18992
18993 // Ensure the FP type is also legal.
18994 if (!TLI.isTypeLegal(Src.getValueType()))
18995 return SDValue();
18996
18997 // Don't do this for f16 with Zfhmin and not Zfh.
18998 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
18999 return SDValue();
19000
19001 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
19002
19003 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
19004 if (FRM == RISCVFPRndMode::Invalid)
19005 return SDValue();
19006
19007 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
19008
19009 unsigned Opc;
19010 if (SatVT == DstVT)
19011 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
19012 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
19013 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
19014 else
19015 return SDValue();
19016 // FIXME: Support other SatVTs by clamping before or after the conversion.
19017
19018 Src = Src.getOperand(0);
19019
19020 SDLoc DL(N);
19021 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
19022 DAG.getTargetConstant(FRM, DL, XLenVT));
19023
19024 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
19025 // extend.
19026 if (Opc == RISCVISD::FCVT_WU_RV64)
19027 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
19028
19029 // RISC-V FP-to-int conversions saturate to the destination register size, but
19030 // don't produce 0 for nan.
19031 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
19032 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
19033}
19034
19035// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
19036// smaller than XLenVT.
19038 const RISCVSubtarget &Subtarget) {
19039 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
19040
19041 SDValue Src = N->getOperand(0);
19042 if (Src.getOpcode() != ISD::BSWAP)
19043 return SDValue();
19044
19045 EVT VT = N->getValueType(0);
19046 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
19048 return SDValue();
19049
19050 SDLoc DL(N);
19051 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
19052}
19053
19055 const RISCVSubtarget &Subtarget) {
19056 // Fold:
19057 // vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)
19058
19059 // Check if its first operand is a vp.load.
19060 auto *VPLoad = dyn_cast<VPLoadSDNode>(N->getOperand(0));
19061 if (!VPLoad)
19062 return SDValue();
19063
19064 EVT LoadVT = VPLoad->getValueType(0);
19065 // We do not have a strided_load version for masks, and the evl of vp.reverse
19066 // and vp.load should always be the same.
19067 if (!LoadVT.getVectorElementType().isByteSized() ||
19068 N->getOperand(2) != VPLoad->getVectorLength() ||
19069 !N->getOperand(0).hasOneUse())
19070 return SDValue();
19071
19072 // Check if the mask of outer vp.reverse are all 1's.
19073 if (!isOneOrOneSplat(N->getOperand(1)))
19074 return SDValue();
19075
19076 SDValue LoadMask = VPLoad->getMask();
19077 // If Mask is all ones, then load is unmasked and can be reversed.
19078 if (!isOneOrOneSplat(LoadMask)) {
19079 // If the mask is not all ones, we can reverse the load if the mask was also
19080 // reversed by an unmasked vp.reverse with the same EVL.
19081 if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
19082 !isOneOrOneSplat(LoadMask.getOperand(1)) ||
19083 LoadMask.getOperand(2) != VPLoad->getVectorLength())
19084 return SDValue();
19085 LoadMask = LoadMask.getOperand(0);
19086 }
19087
19088 // Base = LoadAddr + (NumElem - 1) * ElemWidthByte
19089 SDLoc DL(N);
19090 MVT XLenVT = Subtarget.getXLenVT();
19091 SDValue NumElem = VPLoad->getVectorLength();
19092 uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;
19093
19094 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
19095 DAG.getConstant(1, DL, XLenVT));
19096 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
19097 DAG.getConstant(ElemWidthByte, DL, XLenVT));
19098 SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2);
19099 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
19100
19102 MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());
19104 PtrInfo, VPLoad->getMemOperand()->getFlags(),
19105 LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());
19106
19107 SDValue Ret = DAG.getStridedLoadVP(
19108 LoadVT, DL, VPLoad->getChain(), Base, Stride, LoadMask,
19109 VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());
19110
19111 DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));
19112
19113 return Ret;
19114}
19115
19117 const RISCVSubtarget &Subtarget) {
19118 // Fold:
19119 // vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR,
19120 // -1, MASK)
19121 auto *VPStore = cast<VPStoreSDNode>(N);
19122
19123 if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)
19124 return SDValue();
19125
19126 SDValue VPReverse = VPStore->getValue();
19127 EVT ReverseVT = VPReverse->getValueType(0);
19128
19129 // We do not have a strided_store version for masks, and the evl of vp.reverse
19130 // and vp.store should always be the same.
19131 if (!ReverseVT.getVectorElementType().isByteSized() ||
19132 VPStore->getVectorLength() != VPReverse.getOperand(2) ||
19133 !VPReverse.hasOneUse())
19134 return SDValue();
19135
19136 SDValue StoreMask = VPStore->getMask();
19137 // If Mask is all ones, then load is unmasked and can be reversed.
19138 if (!isOneOrOneSplat(StoreMask)) {
19139 // If the mask is not all ones, we can reverse the store if the mask was
19140 // also reversed by an unmasked vp.reverse with the same EVL.
19141 if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
19142 !isOneOrOneSplat(StoreMask.getOperand(1)) ||
19143 StoreMask.getOperand(2) != VPStore->getVectorLength())
19144 return SDValue();
19145 StoreMask = StoreMask.getOperand(0);
19146 }
19147
19148 // Base = StoreAddr + (NumElem - 1) * ElemWidthByte
19149 SDLoc DL(N);
19150 MVT XLenVT = Subtarget.getXLenVT();
19151 SDValue NumElem = VPStore->getVectorLength();
19152 uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / 8;
19153
19154 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
19155 DAG.getConstant(1, DL, XLenVT));
19156 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
19157 DAG.getConstant(ElemWidthByte, DL, XLenVT));
19158 SDValue Base =
19159 DAG.getNode(ISD::ADD, DL, XLenVT, VPStore->getBasePtr(), Temp2);
19160 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
19161
19163 MachinePointerInfo PtrInfo(VPStore->getAddressSpace());
19165 PtrInfo, VPStore->getMemOperand()->getFlags(),
19166 LocationSize::beforeOrAfterPointer(), VPStore->getAlign());
19167
19168 return DAG.getStridedStoreVP(
19169 VPStore->getChain(), DL, VPReverse.getOperand(0), Base,
19170 VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(),
19171 VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(),
19172 VPStore->isTruncatingStore(), VPStore->isCompressingStore());
19173}
19174
19175// Peephole avgceil pattern.
19176// %1 = zext <N x i8> %a to <N x i32>
19177// %2 = zext <N x i8> %b to <N x i32>
19178// %3 = add nuw nsw <N x i32> %1, splat (i32 1)
19179// %4 = add nuw nsw <N x i32> %3, %2
19180// %5 = lshr <N x i32> %4, splat (i32 1)
19181// %6 = trunc <N x i32> %5 to <N x i8>
19183 const RISCVSubtarget &Subtarget) {
19184 EVT VT = N->getValueType(0);
19185
19186 // Ignore fixed vectors.
19187 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19188 if (!VT.isScalableVector() || !TLI.isTypeLegal(VT))
19189 return SDValue();
19190
19191 SDValue In = N->getOperand(0);
19192 SDValue Mask = N->getOperand(1);
19193 SDValue VL = N->getOperand(2);
19194
19195 // Input should be a vp_srl with same mask and VL.
19196 if (In.getOpcode() != ISD::VP_SRL || In.getOperand(2) != Mask ||
19197 In.getOperand(3) != VL)
19198 return SDValue();
19199
19200 // Shift amount should be 1.
19201 if (!isOneOrOneSplat(In.getOperand(1)))
19202 return SDValue();
19203
19204 // Shifted value should be a vp_add with same mask and VL.
19205 SDValue LHS = In.getOperand(0);
19206 if (LHS.getOpcode() != ISD::VP_ADD || LHS.getOperand(2) != Mask ||
19207 LHS.getOperand(3) != VL)
19208 return SDValue();
19209
19210 SDValue Operands[3];
19211
19212 // Matches another VP_ADD with same VL and Mask.
19213 auto FindAdd = [&](SDValue V, SDValue Other) {
19214 if (V.getOpcode() != ISD::VP_ADD || V.getOperand(2) != Mask ||
19215 V.getOperand(3) != VL)
19216 return false;
19217
19218 Operands[0] = Other;
19219 Operands[1] = V.getOperand(1);
19220 Operands[2] = V.getOperand(0);
19221 return true;
19222 };
19223
19224 // We need to find another VP_ADD in one of the operands.
19225 SDValue LHS0 = LHS.getOperand(0);
19226 SDValue LHS1 = LHS.getOperand(1);
19227 if (!FindAdd(LHS0, LHS1) && !FindAdd(LHS1, LHS0))
19228 return SDValue();
19229
19230 // Now we have three operands of two additions. Check that one of them is a
19231 // constant vector with ones.
19232 auto I = llvm::find_if(Operands,
19233 [](const SDValue &Op) { return isOneOrOneSplat(Op); });
19234 if (I == std::end(Operands))
19235 return SDValue();
19236 // We found a vector with ones, move if it to the end of the Operands array.
19237 std::swap(*I, Operands[2]);
19238
19239 // Make sure the other 2 operands can be promoted from the result type.
19240 for (SDValue Op : drop_end(Operands)) {
19241 if (Op.getOpcode() != ISD::VP_ZERO_EXTEND || Op.getOperand(1) != Mask ||
19242 Op.getOperand(2) != VL)
19243 return SDValue();
19244 // Input must be the same size or smaller than our result.
19245 if (Op.getOperand(0).getScalarValueSizeInBits() > VT.getScalarSizeInBits())
19246 return SDValue();
19247 }
19248
19249 // Pattern is detected.
19250 // Rebuild the zero extends in case the inputs are smaller than our result.
19251 SDValue NewOp0 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[0]), VT,
19252 Operands[0].getOperand(0), Mask, VL);
19253 SDValue NewOp1 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[1]), VT,
19254 Operands[1].getOperand(0), Mask, VL);
19255 // Build a AVGCEILU_VL which will be selected as a VAADDU with RNU rounding
19256 // mode.
19257 SDLoc DL(N);
19258 return DAG.getNode(RISCVISD::AVGCEILU_VL, DL, VT,
19259 {NewOp0, NewOp1, DAG.getUNDEF(VT), Mask, VL});
19260}
19261
19262// Convert from one FMA opcode to another based on whether we are negating the
19263// multiply result and/or the accumulator.
19264// NOTE: Only supports RVV operations with VL.
19265static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
19266 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
19267 if (NegMul) {
19268 // clang-format off
19269 switch (Opcode) {
19270 default: llvm_unreachable("Unexpected opcode");
19271 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
19272 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
19273 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
19274 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
19275 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
19276 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
19277 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
19278 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
19279 }
19280 // clang-format on
19281 }
19282
19283 // Negating the accumulator changes ADD<->SUB.
19284 if (NegAcc) {
19285 // clang-format off
19286 switch (Opcode) {
19287 default: llvm_unreachable("Unexpected opcode");
19288 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
19289 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
19290 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
19291 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
19292 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
19293 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
19294 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
19295 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
19296 }
19297 // clang-format on
19298 }
19299
19300 return Opcode;
19301}
19302
19304 // Fold FNEG_VL into FMA opcodes.
19305 // The first operand of strict-fp is chain.
19306 bool IsStrict =
19307 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
19308 unsigned Offset = IsStrict ? 1 : 0;
19309 SDValue A = N->getOperand(0 + Offset);
19310 SDValue B = N->getOperand(1 + Offset);
19311 SDValue C = N->getOperand(2 + Offset);
19312 SDValue Mask = N->getOperand(3 + Offset);
19313 SDValue VL = N->getOperand(4 + Offset);
19314
19315 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
19316 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
19317 V.getOperand(2) == VL) {
19318 // Return the negated input.
19319 V = V.getOperand(0);
19320 return true;
19321 }
19322
19323 return false;
19324 };
19325
19326 bool NegA = invertIfNegative(A);
19327 bool NegB = invertIfNegative(B);
19328 bool NegC = invertIfNegative(C);
19329
19330 // If no operands are negated, we're done.
19331 if (!NegA && !NegB && !NegC)
19332 return SDValue();
19333
19334 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
19335 if (IsStrict)
19336 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
19337 {N->getOperand(0), A, B, C, Mask, VL});
19338 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
19339 VL);
19340}
19341
19344 const RISCVSubtarget &Subtarget) {
19345 SelectionDAG &DAG = DCI.DAG;
19346
19348 return V;
19349
19350 // FIXME: Ignore strict opcodes for now.
19351 if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
19352 return SDValue();
19353
19354 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
19355}
19356
19358 const RISCVSubtarget &Subtarget) {
19359 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
19360
19361 EVT VT = N->getValueType(0);
19362
19363 if (VT != Subtarget.getXLenVT())
19364 return SDValue();
19365
19366 if (!isa<ConstantSDNode>(N->getOperand(1)))
19367 return SDValue();
19368 uint64_t ShAmt = N->getConstantOperandVal(1);
19369
19370 SDValue N0 = N->getOperand(0);
19371
19372 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
19373 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
19374 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
19375 unsigned ExtSize =
19376 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
19377 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
19378 N0.getOperand(0).hasOneUse() &&
19380 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
19381 if (LShAmt < ExtSize) {
19382 unsigned Size = VT.getSizeInBits();
19383 SDLoc ShlDL(N0.getOperand(0));
19384 SDValue Shl =
19385 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
19386 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
19387 SDLoc DL(N);
19388 return DAG.getNode(ISD::SRA, DL, VT, Shl,
19389 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
19390 }
19391 }
19392 }
19393
19394 if (ShAmt > 32 || VT != MVT::i64)
19395 return SDValue();
19396
19397 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
19398 // FIXME: Should this be a generic combine? There's a similar combine on X86.
19399 //
19400 // Also try these folds where an add or sub is in the middle.
19401 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
19402 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
19403 SDValue Shl;
19404 ConstantSDNode *AddC = nullptr;
19405
19406 // We might have an ADD or SUB between the SRA and SHL.
19407 bool IsAdd = N0.getOpcode() == ISD::ADD;
19408 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
19409 // Other operand needs to be a constant we can modify.
19410 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
19411 if (!AddC)
19412 return SDValue();
19413
19414 // AddC needs to have at least 32 trailing zeros.
19415 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
19416 return SDValue();
19417
19418 // All users should be a shift by constant less than or equal to 32. This
19419 // ensures we'll do this optimization for each of them to produce an
19420 // add/sub+sext_inreg they can all share.
19421 for (SDNode *U : N0->users()) {
19422 if (U->getOpcode() != ISD::SRA ||
19423 !isa<ConstantSDNode>(U->getOperand(1)) ||
19424 U->getConstantOperandVal(1) > 32)
19425 return SDValue();
19426 }
19427
19428 Shl = N0.getOperand(IsAdd ? 0 : 1);
19429 } else {
19430 // Not an ADD or SUB.
19431 Shl = N0;
19432 }
19433
19434 // Look for a shift left by 32.
19435 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
19436 Shl.getConstantOperandVal(1) != 32)
19437 return SDValue();
19438
19439 // We if we didn't look through an add/sub, then the shl should have one use.
19440 // If we did look through an add/sub, the sext_inreg we create is free so
19441 // we're only creating 2 new instructions. It's enough to only remove the
19442 // original sra+add/sub.
19443 if (!AddC && !Shl.hasOneUse())
19444 return SDValue();
19445
19446 SDLoc DL(N);
19447 SDValue In = Shl.getOperand(0);
19448
19449 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
19450 // constant.
19451 if (AddC) {
19452 SDValue ShiftedAddC =
19453 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
19454 if (IsAdd)
19455 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
19456 else
19457 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
19458 }
19459
19460 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
19461 DAG.getValueType(MVT::i32));
19462 if (ShAmt == 32)
19463 return SExt;
19464
19465 return DAG.getNode(
19466 ISD::SHL, DL, MVT::i64, SExt,
19467 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
19468}
19469
19470// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
19471// the result is used as the condition of a br_cc or select_cc we can invert,
19472// inverting the setcc is free, and Z is 0/1. Caller will invert the
19473// br_cc/select_cc.
19475 bool IsAnd = Cond.getOpcode() == ISD::AND;
19476 if (!IsAnd && Cond.getOpcode() != ISD::OR)
19477 return SDValue();
19478
19479 if (!Cond.hasOneUse())
19480 return SDValue();
19481
19482 SDValue Setcc = Cond.getOperand(0);
19483 SDValue Xor = Cond.getOperand(1);
19484 // Canonicalize setcc to LHS.
19485 if (Setcc.getOpcode() != ISD::SETCC)
19486 std::swap(Setcc, Xor);
19487 // LHS should be a setcc and RHS should be an xor.
19488 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
19489 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
19490 return SDValue();
19491
19492 // If the condition is an And, SimplifyDemandedBits may have changed
19493 // (xor Z, 1) to (not Z).
19494 SDValue Xor1 = Xor.getOperand(1);
19495 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
19496 return SDValue();
19497
19498 EVT VT = Cond.getValueType();
19499 SDValue Xor0 = Xor.getOperand(0);
19500
19501 // The LHS of the xor needs to be 0/1.
19503 if (!DAG.MaskedValueIsZero(Xor0, Mask))
19504 return SDValue();
19505
19506 // We can only invert integer setccs.
19507 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
19508 if (!SetCCOpVT.isScalarInteger())
19509 return SDValue();
19510
19511 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
19512 if (ISD::isIntEqualitySetCC(CCVal)) {
19513 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
19514 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
19515 Setcc.getOperand(1), CCVal);
19516 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
19517 // Invert (setlt 0, X) by converting to (setlt X, 1).
19518 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
19519 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
19520 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
19521 // (setlt X, 1) by converting to (setlt 0, X).
19522 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
19523 DAG.getConstant(0, SDLoc(Setcc), VT),
19524 Setcc.getOperand(0), CCVal);
19525 } else
19526 return SDValue();
19527
19528 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
19529 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
19530}
19531
19532// Perform common combines for BR_CC and SELECT_CC conditions.
19533static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
19534 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
19535 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
19536
19537 // As far as arithmetic right shift always saves the sign,
19538 // shift can be omitted.
19539 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
19540 // setge (sra X, N), 0 -> setge X, 0
19541 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
19542 LHS.getOpcode() == ISD::SRA) {
19543 LHS = LHS.getOperand(0);
19544 return true;
19545 }
19546
19547 if (!ISD::isIntEqualitySetCC(CCVal))
19548 return false;
19549
19550 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
19551 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
19552 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
19553 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
19554 // If we're looking for eq 0 instead of ne 0, we need to invert the
19555 // condition.
19556 bool Invert = CCVal == ISD::SETEQ;
19557 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
19558 if (Invert)
19559 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
19560
19561 RHS = LHS.getOperand(1);
19562 LHS = LHS.getOperand(0);
19563 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
19564
19565 CC = DAG.getCondCode(CCVal);
19566 return true;
19567 }
19568
19569 // If XOR is reused and has an immediate that will fit in XORI,
19570 // do not fold.
19571 auto isXorImmediate = [](const SDValue &Op) -> bool {
19572 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(Op))
19573 return isInt<12>(XorCnst->getSExtValue());
19574 return false;
19575 };
19576 // Fold (X(i1) ^ 1) == 0 -> X != 0
19577 auto singleBitOp = [&DAG](const SDValue &VarOp,
19578 const SDValue &ConstOp) -> bool {
19579 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(ConstOp)) {
19580 const APInt Mask = APInt::getBitsSetFrom(VarOp.getValueSizeInBits(), 1);
19581 return (XorCnst->getSExtValue() == 1) &&
19582 DAG.MaskedValueIsZero(VarOp, Mask);
19583 }
19584 return false;
19585 };
19586 auto onlyUsedBySelectOrBR = [](const SDValue &Op) -> bool {
19587 for (const SDNode *UserNode : Op->users()) {
19588 const unsigned Opcode = UserNode->getOpcode();
19589 if (Opcode != RISCVISD::SELECT_CC && Opcode != RISCVISD::BR_CC)
19590 return false;
19591 }
19592 return true;
19593 };
19594 auto isFoldableXorEq = [isXorImmediate, singleBitOp, onlyUsedBySelectOrBR](
19595 const SDValue &LHS, const SDValue &RHS) -> bool {
19596 return LHS.getOpcode() == ISD::XOR && isNullConstant(RHS) &&
19597 (!isXorImmediate(LHS.getOperand(1)) ||
19598 singleBitOp(LHS.getOperand(0), LHS.getOperand(1)) ||
19599 onlyUsedBySelectOrBR(LHS));
19600 };
19601 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
19602 if (isFoldableXorEq(LHS, RHS)) {
19603 RHS = LHS.getOperand(1);
19604 LHS = LHS.getOperand(0);
19605 return true;
19606 }
19607 // Fold ((sext (xor X, C)), 0, eq/ne) -> ((sext(X), C, eq/ne)
19608 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG) {
19609 const SDValue LHS0 = LHS.getOperand(0);
19610 if (isFoldableXorEq(LHS0, RHS) && isa<ConstantSDNode>(LHS0.getOperand(1))) {
19611 // SEXT(XOR(X, Y)) -> XOR(SEXT(X), SEXT(Y)))
19612 RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
19613 LHS0.getOperand(1), LHS.getOperand(1));
19614 LHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
19615 LHS0.getOperand(0), LHS.getOperand(1));
19616 return true;
19617 }
19618 }
19619
19620 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
19621 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
19622 LHS.getOperand(1).getOpcode() == ISD::Constant) {
19623 SDValue LHS0 = LHS.getOperand(0);
19624 if (LHS0.getOpcode() == ISD::AND &&
19625 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
19626 uint64_t Mask = LHS0.getConstantOperandVal(1);
19627 uint64_t ShAmt = LHS.getConstantOperandVal(1);
19628 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
19629 // XAndesPerf supports branch on test bit.
19630 if (Subtarget.hasVendorXAndesPerf()) {
19631 LHS =
19632 DAG.getNode(ISD::AND, DL, LHS.getValueType(), LHS0.getOperand(0),
19633 DAG.getConstant(Mask, DL, LHS.getValueType()));
19634 return true;
19635 }
19636
19637 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
19638 CC = DAG.getCondCode(CCVal);
19639
19640 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
19641 LHS = LHS0.getOperand(0);
19642 if (ShAmt != 0)
19643 LHS =
19644 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
19645 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
19646 return true;
19647 }
19648 }
19649 }
19650
19651 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
19652 // This can occur when legalizing some floating point comparisons.
19653 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
19654 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
19655 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
19656 CC = DAG.getCondCode(CCVal);
19657 RHS = DAG.getConstant(0, DL, LHS.getValueType());
19658 return true;
19659 }
19660
19661 if (isNullConstant(RHS)) {
19662 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
19663 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
19664 CC = DAG.getCondCode(CCVal);
19665 LHS = NewCond;
19666 return true;
19667 }
19668 }
19669
19670 return false;
19671}
19672
19673// Fold
19674// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
19675// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
19676// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
19677// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
19678// (select C, (rotl Y, X), Y) -> (rotl Y, (select C, X, 0)).
19679// (select C, (rotr Y, X), Y) -> (rotr Y, (select C, X, 0)).
19681 SDValue TrueVal, SDValue FalseVal,
19682 bool Swapped) {
19683 bool Commutative = true;
19684 unsigned Opc = TrueVal.getOpcode();
19685 switch (Opc) {
19686 default:
19687 return SDValue();
19688 case ISD::SHL:
19689 case ISD::SRA:
19690 case ISD::SRL:
19691 case ISD::SUB:
19692 case ISD::ROTL:
19693 case ISD::ROTR:
19694 Commutative = false;
19695 break;
19696 case ISD::ADD:
19697 case ISD::OR:
19698 case ISD::XOR:
19699 case ISD::UMIN:
19700 case ISD::UMAX:
19701 break;
19702 }
19703
19704 if (!TrueVal.hasOneUse())
19705 return SDValue();
19706
19707 unsigned OpToFold;
19708 if (FalseVal == TrueVal.getOperand(0))
19709 OpToFold = 0;
19710 else if (Commutative && FalseVal == TrueVal.getOperand(1))
19711 OpToFold = 1;
19712 else
19713 return SDValue();
19714
19715 EVT VT = N->getValueType(0);
19716 SDLoc DL(N);
19717 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
19718 EVT OtherOpVT = OtherOp.getValueType();
19719 SDValue IdentityOperand =
19720 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
19721 if (!Commutative)
19722 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
19723 assert(IdentityOperand && "No identity operand!");
19724
19725 if (Swapped)
19726 std::swap(OtherOp, IdentityOperand);
19727 SDValue NewSel =
19728 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
19729 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
19730}
19731
19732// This tries to get rid of `select` and `icmp` that are being used to handle
19733// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
19735 SDValue Cond = N->getOperand(0);
19736
19737 // This represents either CTTZ or CTLZ instruction.
19738 SDValue CountZeroes;
19739
19740 SDValue ValOnZero;
19741
19742 if (Cond.getOpcode() != ISD::SETCC)
19743 return SDValue();
19744
19745 if (!isNullConstant(Cond->getOperand(1)))
19746 return SDValue();
19747
19748 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
19749 if (CCVal == ISD::CondCode::SETEQ) {
19750 CountZeroes = N->getOperand(2);
19751 ValOnZero = N->getOperand(1);
19752 } else if (CCVal == ISD::CondCode::SETNE) {
19753 CountZeroes = N->getOperand(1);
19754 ValOnZero = N->getOperand(2);
19755 } else {
19756 return SDValue();
19757 }
19758
19759 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
19760 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
19761 CountZeroes = CountZeroes.getOperand(0);
19762
19763 if (CountZeroes.getOpcode() != ISD::CTTZ &&
19764 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
19765 CountZeroes.getOpcode() != ISD::CTLZ &&
19766 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
19767 return SDValue();
19768
19769 if (!isNullConstant(ValOnZero))
19770 return SDValue();
19771
19772 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
19773 if (Cond->getOperand(0) != CountZeroesArgument)
19774 return SDValue();
19775
19776 unsigned BitWidth = CountZeroes.getValueSizeInBits();
19777 if (!isPowerOf2_32(BitWidth))
19778 return SDValue();
19779
19780 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
19781 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
19782 CountZeroes.getValueType(), CountZeroesArgument);
19783 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
19784 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
19785 CountZeroes.getValueType(), CountZeroesArgument);
19786 }
19787
19788 SDValue BitWidthMinusOne =
19789 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
19790
19791 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
19792 CountZeroes, BitWidthMinusOne);
19793 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
19794}
19795
19797 const RISCVSubtarget &Subtarget) {
19798 SDValue Cond = N->getOperand(0);
19799 SDValue True = N->getOperand(1);
19800 SDValue False = N->getOperand(2);
19801 SDLoc DL(N);
19802 EVT VT = N->getValueType(0);
19803 EVT CondVT = Cond.getValueType();
19804
19805 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
19806 return SDValue();
19807
19808 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
19809 // BEXTI, where C is power of 2.
19810 if (Subtarget.hasBEXTILike() && VT.isScalarInteger() &&
19811 (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())) {
19812 SDValue LHS = Cond.getOperand(0);
19813 SDValue RHS = Cond.getOperand(1);
19814 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
19815 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
19816 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
19817 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
19818 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
19819 return DAG.getSelect(DL, VT,
19820 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
19821 False, True);
19822 }
19823 }
19824 return SDValue();
19825}
19826
19827static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC) {
19828 if (!TrueVal.hasOneUse() || !FalseVal.hasOneUse())
19829 return false;
19830
19831 SwapCC = false;
19832 if (TrueVal.getOpcode() == ISD::SUB && FalseVal.getOpcode() == ISD::ADD) {
19833 std::swap(TrueVal, FalseVal);
19834 SwapCC = true;
19835 }
19836
19837 if (TrueVal.getOpcode() != ISD::ADD || FalseVal.getOpcode() != ISD::SUB)
19838 return false;
19839
19840 SDValue A = FalseVal.getOperand(0);
19841 SDValue B = FalseVal.getOperand(1);
19842 // Add is commutative, so check both orders
19843 return ((TrueVal.getOperand(0) == A && TrueVal.getOperand(1) == B) ||
19844 (TrueVal.getOperand(1) == A && TrueVal.getOperand(0) == B));
19845}
19846
19847/// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b).
19848/// This allows us match a vadd.vv fed by a masked vrsub, which reduces
19849/// register pressure over the add followed by masked vsub sequence.
19851 SDLoc DL(N);
19852 EVT VT = N->getValueType(0);
19853 SDValue CC = N->getOperand(0);
19854 SDValue TrueVal = N->getOperand(1);
19855 SDValue FalseVal = N->getOperand(2);
19856
19857 bool SwapCC;
19858 if (!matchSelectAddSub(TrueVal, FalseVal, SwapCC))
19859 return SDValue();
19860
19861 SDValue Sub = SwapCC ? TrueVal : FalseVal;
19862 SDValue A = Sub.getOperand(0);
19863 SDValue B = Sub.getOperand(1);
19864
19865 // Arrange the select such that we can match a masked
19866 // vrsub.vi to perform the conditional negate
19867 SDValue NegB = DAG.getNegative(B, DL, VT);
19868 if (!SwapCC)
19869 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
19870 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
19871 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
19872}
19873
19875 const RISCVSubtarget &Subtarget) {
19876 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
19877 return Folded;
19878
19879 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
19880 return V;
19881
19882 if (Subtarget.hasConditionalMoveFusion())
19883 return SDValue();
19884
19885 SDValue TrueVal = N->getOperand(1);
19886 SDValue FalseVal = N->getOperand(2);
19887 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
19888 return V;
19889 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
19890}
19891
19892/// If we have a build_vector where each lane is binop X, C, where C
19893/// is a constant (but not necessarily the same constant on all lanes),
19894/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
19895/// We assume that materializing a constant build vector will be no more
19896/// expensive that performing O(n) binops.
19898 const RISCVSubtarget &Subtarget,
19899 const RISCVTargetLowering &TLI) {
19900 SDLoc DL(N);
19901 EVT VT = N->getValueType(0);
19902
19903 assert(!VT.isScalableVector() && "unexpected build vector");
19904
19905 if (VT.getVectorNumElements() == 1)
19906 return SDValue();
19907
19908 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
19909 if (!TLI.isBinOp(Opcode))
19910 return SDValue();
19911
19912 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
19913 return SDValue();
19914
19915 // This BUILD_VECTOR involves an implicit truncation, and sinking
19916 // truncates through binops is non-trivial.
19917 if (N->op_begin()->getValueType() != VT.getVectorElementType())
19918 return SDValue();
19919
19920 SmallVector<SDValue> LHSOps;
19921 SmallVector<SDValue> RHSOps;
19922 for (SDValue Op : N->ops()) {
19923 if (Op.isUndef()) {
19924 // We can't form a divide or remainder from undef.
19925 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
19926 return SDValue();
19927
19928 LHSOps.push_back(Op);
19929 RHSOps.push_back(Op);
19930 continue;
19931 }
19932
19933 // TODO: We can handle operations which have an neutral rhs value
19934 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
19935 // of profit in a more explicit manner.
19936 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
19937 return SDValue();
19938
19939 LHSOps.push_back(Op.getOperand(0));
19940 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
19941 !isa<ConstantFPSDNode>(Op.getOperand(1)))
19942 return SDValue();
19943 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
19944 // have different LHS and RHS types.
19945 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
19946 return SDValue();
19947
19948 RHSOps.push_back(Op.getOperand(1));
19949 }
19950
19951 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
19952 DAG.getBuildVector(VT, DL, RHSOps));
19953}
19954
19956 ElementCount OpEC = OpVT.getVectorElementCount();
19957 assert(OpEC.isKnownMultipleOf(4) && OpVT.getVectorElementType() == MVT::i8);
19958 return MVT::getVectorVT(MVT::i32, OpEC.divideCoefficientBy(4));
19959}
19960
19961/// Given fixed length vectors A and B with equal element types, but possibly
19962/// different number of elements, return A + B where either A or B is zero
19963/// padded to the larger number of elements.
19965 SelectionDAG &DAG) {
19966 // NOTE: Manually doing the extract/add/insert scheme produces
19967 // significantly better codegen than the naive pad with zeros
19968 // and add scheme.
19969 EVT AVT = A.getValueType();
19970 EVT BVT = B.getValueType();
19973 std::swap(A, B);
19974 std::swap(AVT, BVT);
19975 }
19976
19977 SDValue BPart = DAG.getExtractSubvector(DL, AVT, B, 0);
19978 SDValue Res = DAG.getNode(ISD::ADD, DL, AVT, A, BPart);
19979 return DAG.getInsertSubvector(DL, B, Res, 0);
19980}
19981
19983 SelectionDAG &DAG,
19984 const RISCVSubtarget &Subtarget,
19985 const RISCVTargetLowering &TLI) {
19986 using namespace SDPatternMatch;
19987 // Note: We intentionally do not check the legality of the reduction type.
19988 // We want to handle the m4/m8 *src* types, and thus need to let illegal
19989 // intermediate types flow through here.
19990 if (InVec.getValueType().getVectorElementType() != MVT::i32 ||
19992 return SDValue();
19993
19994 // Recurse through adds/disjoint ors (since generic dag canonicalizes to that
19995 // form).
19996 SDValue A, B;
19997 if (sd_match(InVec, m_AddLike(m_Value(A), m_Value(B)))) {
19998 SDValue AOpt = foldReduceOperandViaVQDOT(A, DL, DAG, Subtarget, TLI);
19999 SDValue BOpt = foldReduceOperandViaVQDOT(B, DL, DAG, Subtarget, TLI);
20000 if (AOpt || BOpt) {
20001 if (AOpt)
20002 A = AOpt;
20003 if (BOpt)
20004 B = BOpt;
20005 // From here, we're doing A + B with mixed types, implicitly zero
20006 // padded to the wider type. Note that we *don't* need the result
20007 // type to be the original VT, and in fact prefer narrower ones
20008 // if possible.
20009 return getZeroPaddedAdd(DL, A, B, DAG);
20010 }
20011 }
20012
20013 // zext a <--> partial_reduce_umla 0, a, 1
20014 // sext a <--> partial_reduce_smla 0, a, 1
20015 if (InVec.getOpcode() == ISD::ZERO_EXTEND ||
20016 InVec.getOpcode() == ISD::SIGN_EXTEND) {
20017 SDValue A = InVec.getOperand(0);
20018 EVT OpVT = A.getValueType();
20019 if (OpVT.getVectorElementType() != MVT::i8 || !TLI.isTypeLegal(OpVT))
20020 return SDValue();
20021
20022 MVT ResVT = getQDOTXResultType(A.getSimpleValueType());
20023 SDValue B = DAG.getConstant(0x1, DL, OpVT);
20024 bool IsSigned = InVec.getOpcode() == ISD::SIGN_EXTEND;
20025 unsigned Opc =
20027 return DAG.getNode(Opc, DL, ResVT, {DAG.getConstant(0, DL, ResVT), A, B});
20028 }
20029
20030 // mul (sext a, sext b) -> partial_reduce_smla 0, a, b
20031 // mul (zext a, zext b) -> partial_reduce_umla 0, a, b
20032 // mul (sext a, zext b) -> partial_reduce_ssmla 0, a, b
20033 // mul (zext a, sext b) -> partial_reduce_smla 0, b, a (swapped)
20034 if (!sd_match(InVec, m_Mul(m_Value(A), m_Value(B))))
20035 return SDValue();
20036
20037 if (!ISD::isExtOpcode(A.getOpcode()))
20038 return SDValue();
20039
20040 EVT OpVT = A.getOperand(0).getValueType();
20041 if (OpVT.getVectorElementType() != MVT::i8 ||
20042 OpVT != B.getOperand(0).getValueType() ||
20043 !TLI.isTypeLegal(A.getValueType()))
20044 return SDValue();
20045
20046 unsigned Opc;
20047 if (A.getOpcode() == ISD::SIGN_EXTEND && B.getOpcode() == ISD::SIGN_EXTEND)
20049 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
20050 B.getOpcode() == ISD::ZERO_EXTEND)
20052 else if (A.getOpcode() == ISD::SIGN_EXTEND &&
20053 B.getOpcode() == ISD::ZERO_EXTEND)
20055 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
20056 B.getOpcode() == ISD::SIGN_EXTEND) {
20058 std::swap(A, B);
20059 } else
20060 return SDValue();
20061
20062 MVT ResVT = getQDOTXResultType(OpVT.getSimpleVT());
20063 return DAG.getNode(
20064 Opc, DL, ResVT,
20065 {DAG.getConstant(0, DL, ResVT), A.getOperand(0), B.getOperand(0)});
20066}
20067
20069 const RISCVSubtarget &Subtarget,
20070 const RISCVTargetLowering &TLI) {
20071 if (!Subtarget.hasStdExtZvqdotq())
20072 return SDValue();
20073
20074 SDLoc DL(N);
20075 EVT VT = N->getValueType(0);
20076 SDValue InVec = N->getOperand(0);
20077 if (SDValue V = foldReduceOperandViaVQDOT(InVec, DL, DAG, Subtarget, TLI))
20078 return DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, V);
20079 return SDValue();
20080}
20081
20083 const RISCVSubtarget &Subtarget,
20084 const RISCVTargetLowering &TLI) {
20085 SDValue InVec = N->getOperand(0);
20086 SDValue InVal = N->getOperand(1);
20087 SDValue EltNo = N->getOperand(2);
20088 SDLoc DL(N);
20089
20090 EVT VT = InVec.getValueType();
20091 if (VT.isScalableVector())
20092 return SDValue();
20093
20094 if (!InVec.hasOneUse())
20095 return SDValue();
20096
20097 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
20098 // move the insert_vector_elts into the arms of the binop. Note that
20099 // the new RHS must be a constant.
20100 const unsigned InVecOpcode = InVec->getOpcode();
20101 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
20102 InVal.hasOneUse()) {
20103 SDValue InVecLHS = InVec->getOperand(0);
20104 SDValue InVecRHS = InVec->getOperand(1);
20105 SDValue InValLHS = InVal->getOperand(0);
20106 SDValue InValRHS = InVal->getOperand(1);
20107
20109 return SDValue();
20110 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
20111 return SDValue();
20112 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
20113 // have different LHS and RHS types.
20114 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
20115 return SDValue();
20117 InVecLHS, InValLHS, EltNo);
20119 InVecRHS, InValRHS, EltNo);
20120 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
20121 }
20122
20123 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
20124 // move the insert_vector_elt to the source operand of the concat_vector.
20125 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
20126 return SDValue();
20127
20128 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
20129 if (!IndexC)
20130 return SDValue();
20131 unsigned Elt = IndexC->getZExtValue();
20132
20133 EVT ConcatVT = InVec.getOperand(0).getValueType();
20134 if (ConcatVT.getVectorElementType() != InVal.getValueType())
20135 return SDValue();
20136 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
20137 unsigned NewIdx = Elt % ConcatNumElts;
20138
20139 unsigned ConcatOpIdx = Elt / ConcatNumElts;
20140 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
20141 ConcatOp = DAG.getInsertVectorElt(DL, ConcatOp, InVal, NewIdx);
20142
20143 SmallVector<SDValue> ConcatOps(InVec->ops());
20144 ConcatOps[ConcatOpIdx] = ConcatOp;
20145 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
20146}
20147
20148// If we're concatenating a series of vector loads like
20149// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
20150// Then we can turn this into a strided load by widening the vector elements
20151// vlse32 p, stride=n
20153 const RISCVSubtarget &Subtarget,
20154 const RISCVTargetLowering &TLI) {
20155 SDLoc DL(N);
20156 EVT VT = N->getValueType(0);
20157
20158 // Only perform this combine on legal MVTs.
20159 if (!TLI.isTypeLegal(VT))
20160 return SDValue();
20161
20162 // TODO: Potentially extend this to scalable vectors
20163 if (VT.isScalableVector())
20164 return SDValue();
20165
20166 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
20167 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
20168 !SDValue(BaseLd, 0).hasOneUse())
20169 return SDValue();
20170
20171 EVT BaseLdVT = BaseLd->getValueType(0);
20172
20173 // Go through the loads and check that they're strided
20175 Lds.push_back(BaseLd);
20176 Align Align = BaseLd->getAlign();
20177 for (SDValue Op : N->ops().drop_front()) {
20178 auto *Ld = dyn_cast<LoadSDNode>(Op);
20179 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
20180 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
20181 Ld->getValueType(0) != BaseLdVT)
20182 return SDValue();
20183
20184 Lds.push_back(Ld);
20185
20186 // The common alignment is the most restrictive (smallest) of all the loads
20187 Align = std::min(Align, Ld->getAlign());
20188 }
20189
20190 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
20191 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
20192 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
20193 // If the load ptrs can be decomposed into a common (Base + Index) with a
20194 // common constant stride, then return the constant stride.
20195 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
20196 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
20197 if (BIO1.equalBaseIndex(BIO2, DAG))
20198 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
20199
20200 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
20201 SDValue P1 = Ld1->getBasePtr();
20202 SDValue P2 = Ld2->getBasePtr();
20203 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
20204 return {{P2.getOperand(1), false}};
20205 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
20206 return {{P1.getOperand(1), true}};
20207
20208 return std::nullopt;
20209 };
20210
20211 // Get the distance between the first and second loads
20212 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
20213 if (!BaseDiff)
20214 return SDValue();
20215
20216 // Check all the loads are the same distance apart
20217 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
20218 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
20219 return SDValue();
20220
20221 // TODO: At this point, we've successfully matched a generalized gather
20222 // load. Maybe we should emit that, and then move the specialized
20223 // matchers above and below into a DAG combine?
20224
20225 // Get the widened scalar type, e.g. v4i8 -> i64
20226 unsigned WideScalarBitWidth =
20227 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
20228 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
20229
20230 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
20231 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
20232 if (!TLI.isTypeLegal(WideVecVT))
20233 return SDValue();
20234
20235 // Check that the operation is legal
20236 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
20237 return SDValue();
20238
20239 auto [StrideVariant, MustNegateStride] = *BaseDiff;
20240 SDValue Stride =
20241 std::holds_alternative<SDValue>(StrideVariant)
20242 ? std::get<SDValue>(StrideVariant)
20243 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
20244 Lds[0]->getOffset().getValueType());
20245 if (MustNegateStride)
20246 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
20247
20248 SDValue AllOneMask =
20249 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
20250 DAG.getConstant(1, DL, MVT::i1));
20251
20252 uint64_t MemSize;
20253 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
20254 ConstStride && ConstStride->getSExtValue() >= 0)
20255 // total size = (elsize * n) + (stride - elsize) * (n-1)
20256 // = elsize + stride * (n-1)
20257 MemSize = WideScalarVT.getSizeInBits() +
20258 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
20259 else
20260 // If Stride isn't constant, then we can't know how much it will load
20262
20264 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
20265 Align);
20266
20267 SDValue StridedLoad = DAG.getStridedLoadVP(
20268 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
20269 AllOneMask,
20270 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
20271
20272 for (SDValue Ld : N->ops())
20273 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
20274
20275 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
20276}
20277
20279 const RISCVSubtarget &Subtarget,
20280 const RISCVTargetLowering &TLI) {
20281 SDLoc DL(N);
20282 EVT VT = N->getValueType(0);
20283 const unsigned ElementSize = VT.getScalarSizeInBits();
20284 const unsigned NumElts = VT.getVectorNumElements();
20285 SDValue V1 = N->getOperand(0);
20286 SDValue V2 = N->getOperand(1);
20287 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
20288 MVT XLenVT = Subtarget.getXLenVT();
20289
20290 // Recognized a disguised select of add/sub.
20291 bool SwapCC;
20292 if (ShuffleVectorInst::isSelectMask(Mask, NumElts) &&
20293 matchSelectAddSub(V1, V2, SwapCC)) {
20294 SDValue Sub = SwapCC ? V1 : V2;
20295 SDValue A = Sub.getOperand(0);
20296 SDValue B = Sub.getOperand(1);
20297
20298 SmallVector<SDValue> MaskVals;
20299 for (int MaskIndex : Mask) {
20300 bool SelectMaskVal = (MaskIndex < (int)NumElts);
20301 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
20302 }
20303 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
20304 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
20305 SDValue CC = DAG.getBuildVector(MaskVT, DL, MaskVals);
20306
20307 // Arrange the select such that we can match a masked
20308 // vrsub.vi to perform the conditional negate
20309 SDValue NegB = DAG.getNegative(B, DL, VT);
20310 if (!SwapCC)
20311 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
20312 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
20313 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
20314 }
20315
20316 // Custom legalize <N x i128> or <N x i256> to <M x ELEN>. This runs
20317 // during the combine phase before type legalization, and relies on
20318 // DAGCombine not undoing the transform if isShuffleMaskLegal returns false
20319 // for the source mask.
20320 if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() ||
20321 !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 ||
20322 VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT))
20323 return SDValue();
20324
20325 SmallVector<int, 8> NewMask;
20326 narrowShuffleMaskElts(2, Mask, NewMask);
20327
20328 LLVMContext &C = *DAG.getContext();
20329 EVT NewEltVT = EVT::getIntegerVT(C, ElementSize / 2);
20330 EVT NewVT = EVT::getVectorVT(C, NewEltVT, VT.getVectorNumElements() * 2);
20331 SDValue Res = DAG.getVectorShuffle(NewVT, DL, DAG.getBitcast(NewVT, V1),
20332 DAG.getBitcast(NewVT, V2), NewMask);
20333 return DAG.getBitcast(VT, Res);
20334}
20335
20337 const RISCVSubtarget &Subtarget) {
20338 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
20339
20340 if (N->getValueType(0).isFixedLengthVector())
20341 return SDValue();
20342
20343 SDValue Addend = N->getOperand(0);
20344 SDValue MulOp = N->getOperand(1);
20345
20346 if (N->getOpcode() == RISCVISD::ADD_VL) {
20347 SDValue AddPassthruOp = N->getOperand(2);
20348 if (!AddPassthruOp.isUndef())
20349 return SDValue();
20350 }
20351
20352 auto IsVWMulOpc = [](unsigned Opc) {
20353 switch (Opc) {
20354 case RISCVISD::VWMUL_VL:
20355 case RISCVISD::VWMULU_VL:
20356 case RISCVISD::VWMULSU_VL:
20357 return true;
20358 default:
20359 return false;
20360 }
20361 };
20362
20363 if (!IsVWMulOpc(MulOp.getOpcode()))
20364 std::swap(Addend, MulOp);
20365
20366 if (!IsVWMulOpc(MulOp.getOpcode()))
20367 return SDValue();
20368
20369 SDValue MulPassthruOp = MulOp.getOperand(2);
20370
20371 if (!MulPassthruOp.isUndef())
20372 return SDValue();
20373
20374 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
20375 const RISCVSubtarget &Subtarget) {
20376 if (N->getOpcode() == ISD::ADD) {
20377 SDLoc DL(N);
20378 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
20379 Subtarget);
20380 }
20381 return std::make_pair(N->getOperand(3), N->getOperand(4));
20382 }(N, DAG, Subtarget);
20383
20384 SDValue MulMask = MulOp.getOperand(3);
20385 SDValue MulVL = MulOp.getOperand(4);
20386
20387 if (AddMask != MulMask || AddVL != MulVL)
20388 return SDValue();
20389
20390 const auto &TSInfo =
20391 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
20392 unsigned Opc = TSInfo.getMAccOpcode(MulOp.getOpcode());
20393
20394 SDLoc DL(N);
20395 EVT VT = N->getValueType(0);
20396 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
20397 AddVL};
20398 return DAG.getNode(Opc, DL, VT, Ops);
20399}
20400
20402 const RISCVSubtarget &Subtarget) {
20403
20404 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
20405
20406 if (!N->getValueType(0).isVector())
20407 return SDValue();
20408
20409 SDValue Addend = N->getOperand(0);
20410 SDValue DotOp = N->getOperand(1);
20411
20412 if (N->getOpcode() == RISCVISD::ADD_VL) {
20413 SDValue AddPassthruOp = N->getOperand(2);
20414 if (!AddPassthruOp.isUndef())
20415 return SDValue();
20416 }
20417
20418 auto IsVqdotqOpc = [](unsigned Opc) {
20419 switch (Opc) {
20420 case RISCVISD::VQDOT_VL:
20421 case RISCVISD::VQDOTU_VL:
20422 case RISCVISD::VQDOTSU_VL:
20423 return true;
20424 default:
20425 return false;
20426 }
20427 };
20428
20429 if (!IsVqdotqOpc(DotOp.getOpcode()))
20430 std::swap(Addend, DotOp);
20431
20432 if (!IsVqdotqOpc(DotOp.getOpcode()))
20433 return SDValue();
20434
20435 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
20436 const RISCVSubtarget &Subtarget) {
20437 if (N->getOpcode() == ISD::ADD) {
20438 SDLoc DL(N);
20439 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
20440 Subtarget);
20441 }
20442 return std::make_pair(N->getOperand(3), N->getOperand(4));
20443 }(N, DAG, Subtarget);
20444
20445 SDValue MulVL = DotOp.getOperand(4);
20446 if (AddVL != MulVL)
20447 return SDValue();
20448
20449 if (AddMask.getOpcode() != RISCVISD::VMSET_VL ||
20450 AddMask.getOperand(0) != MulVL)
20451 return SDValue();
20452
20453 SDValue AccumOp = DotOp.getOperand(2);
20454 SDLoc DL(N);
20455 EVT VT = N->getValueType(0);
20456 Addend = DAG.getNode(RISCVISD::ADD_VL, DL, VT, Addend, AccumOp,
20457 DAG.getUNDEF(VT), AddMask, AddVL);
20458
20459 SDValue Ops[] = {DotOp.getOperand(0), DotOp.getOperand(1), Addend,
20460 DotOp.getOperand(3), DotOp->getOperand(4)};
20461 return DAG.getNode(DotOp->getOpcode(), DL, VT, Ops);
20462}
20463
20464static bool
20466 ISD::MemIndexType &IndexType,
20468 if (!DCI.isBeforeLegalize())
20469 return false;
20470
20471 SelectionDAG &DAG = DCI.DAG;
20472 const MVT XLenVT =
20473 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
20474
20475 const EVT IndexVT = Index.getValueType();
20476
20477 // RISC-V indexed loads only support the "unsigned unscaled" addressing
20478 // mode, so anything else must be manually legalized.
20479 if (!isIndexTypeSigned(IndexType))
20480 return false;
20481
20482 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
20483 // Any index legalization should first promote to XLenVT, so we don't lose
20484 // bits when scaling. This may create an illegal index type so we let
20485 // LLVM's legalization take care of the splitting.
20486 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
20487 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
20488 EVT::getVectorVT(*DAG.getContext(), XLenVT,
20489 IndexVT.getVectorElementCount()),
20490 Index);
20491 }
20492 IndexType = ISD::UNSIGNED_SCALED;
20493 return true;
20494}
20495
20496/// Match the index vector of a scatter or gather node as the shuffle mask
20497/// which performs the rearrangement if possible. Will only match if
20498/// all lanes are touched, and thus replacing the scatter or gather with
20499/// a unit strided access and shuffle is legal.
20500static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
20501 SmallVector<int> &ShuffleMask) {
20502 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
20503 return false;
20504 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
20505 return false;
20506
20507 const unsigned ElementSize = VT.getScalarStoreSize();
20508 const unsigned NumElems = VT.getVectorNumElements();
20509
20510 // Create the shuffle mask and check all bits active
20511 assert(ShuffleMask.empty());
20512 BitVector ActiveLanes(NumElems);
20513 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
20514 // TODO: We've found an active bit of UB, and could be
20515 // more aggressive here if desired.
20516 if (Index->getOperand(i)->isUndef())
20517 return false;
20518 uint64_t C = Index->getConstantOperandVal(i);
20519 if (C % ElementSize != 0)
20520 return false;
20521 C = C / ElementSize;
20522 if (C >= NumElems)
20523 return false;
20524 ShuffleMask.push_back(C);
20525 ActiveLanes.set(C);
20526 }
20527 return ActiveLanes.all();
20528}
20529
20530/// Match the index of a gather or scatter operation as an operation
20531/// with twice the element width and half the number of elements. This is
20532/// generally profitable (if legal) because these operations are linear
20533/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
20534/// come out ahead.
20535static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
20536 Align BaseAlign, const RISCVSubtarget &ST) {
20537 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
20538 return false;
20539 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
20540 return false;
20541
20542 // Attempt a doubling. If we can use a element type 4x or 8x in
20543 // size, this will happen via multiply iterations of the transform.
20544 const unsigned NumElems = VT.getVectorNumElements();
20545 if (NumElems % 2 != 0)
20546 return false;
20547
20548 const unsigned ElementSize = VT.getScalarStoreSize();
20549 const unsigned WiderElementSize = ElementSize * 2;
20550 if (WiderElementSize > ST.getELen()/8)
20551 return false;
20552
20553 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
20554 return false;
20555
20556 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
20557 // TODO: We've found an active bit of UB, and could be
20558 // more aggressive here if desired.
20559 if (Index->getOperand(i)->isUndef())
20560 return false;
20561 // TODO: This offset check is too strict if we support fully
20562 // misaligned memory operations.
20563 uint64_t C = Index->getConstantOperandVal(i);
20564 if (i % 2 == 0) {
20565 if (C % WiderElementSize != 0)
20566 return false;
20567 continue;
20568 }
20569 uint64_t Last = Index->getConstantOperandVal(i-1);
20570 if (C != Last + ElementSize)
20571 return false;
20572 }
20573 return true;
20574}
20575
20576// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
20577// This would be benefit for the cases where X and Y are both the same value
20578// type of low precision vectors. Since the truncate would be lowered into
20579// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
20580// restriction, such pattern would be expanded into a series of "vsetvli"
20581// and "vnsrl" instructions later to reach this point.
20583 SDValue Mask = N->getOperand(1);
20584 SDValue VL = N->getOperand(2);
20585
20586 bool IsVLMAX = isAllOnesConstant(VL) ||
20587 (isa<RegisterSDNode>(VL) &&
20588 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
20589 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
20590 Mask.getOperand(0) != VL)
20591 return SDValue();
20592
20593 auto IsTruncNode = [&](SDValue V) {
20594 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
20595 V.getOperand(1) == Mask && V.getOperand(2) == VL;
20596 };
20597
20598 SDValue Op = N->getOperand(0);
20599
20600 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
20601 // to distinguish such pattern.
20602 while (IsTruncNode(Op)) {
20603 if (!Op.hasOneUse())
20604 return SDValue();
20605 Op = Op.getOperand(0);
20606 }
20607
20608 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
20609 return SDValue();
20610
20611 SDValue N0 = Op.getOperand(0);
20612 SDValue N1 = Op.getOperand(1);
20613 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
20614 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
20615 return SDValue();
20616
20617 SDValue N00 = N0.getOperand(0);
20618 SDValue N10 = N1.getOperand(0);
20619 if (!N00.getValueType().isVector() ||
20620 N00.getValueType() != N10.getValueType() ||
20621 N->getValueType(0) != N10.getValueType())
20622 return SDValue();
20623
20624 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
20625 SDValue SMin =
20626 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
20627 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
20628 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
20629}
20630
20631// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
20632// maximum value for the truncated type.
20633// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
20634// is the signed maximum value for the truncated type and C2 is the signed
20635// minimum value.
20637 const RISCVSubtarget &Subtarget) {
20638 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
20639
20640 MVT VT = N->getSimpleValueType(0);
20641
20642 SDValue Mask = N->getOperand(1);
20643 SDValue VL = N->getOperand(2);
20644
20645 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
20646 APInt &SplatVal) {
20647 if (V.getOpcode() != Opc &&
20648 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
20649 V.getOperand(3) == Mask && V.getOperand(4) == VL))
20650 return SDValue();
20651
20652 SDValue Op = V.getOperand(1);
20653
20654 // Peek through conversion between fixed and scalable vectors.
20655 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
20656 isNullConstant(Op.getOperand(2)) &&
20657 Op.getOperand(1).getValueType().isFixedLengthVector() &&
20658 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
20659 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
20660 isNullConstant(Op.getOperand(1).getOperand(1)))
20661 Op = Op.getOperand(1).getOperand(0);
20662
20663 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
20664 return V.getOperand(0);
20665
20666 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
20667 Op.getOperand(2) == VL) {
20668 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
20669 SplatVal =
20670 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
20671 return V.getOperand(0);
20672 }
20673 }
20674
20675 return SDValue();
20676 };
20677
20678 SDLoc DL(N);
20679
20680 auto DetectUSatPattern = [&](SDValue V) {
20681 APInt LoC, HiC;
20682
20683 // Simple case, V is a UMIN.
20684 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
20685 if (HiC.isMask(VT.getScalarSizeInBits()))
20686 return UMinOp;
20687
20688 // If we have an SMAX that removes negative numbers first, then we can match
20689 // SMIN instead of UMIN.
20690 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
20691 if (SDValue SMaxOp =
20692 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
20693 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
20694 return SMinOp;
20695
20696 // If we have an SMIN before an SMAX and the SMAX constant is less than or
20697 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
20698 // first.
20699 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
20700 if (SDValue SMinOp =
20701 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
20702 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
20703 HiC.uge(LoC))
20704 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
20705 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
20706 Mask, VL);
20707
20708 return SDValue();
20709 };
20710
20711 auto DetectSSatPattern = [&](SDValue V) {
20712 unsigned NumDstBits = VT.getScalarSizeInBits();
20713 unsigned NumSrcBits = V.getScalarValueSizeInBits();
20714 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
20715 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
20716
20717 APInt HiC, LoC;
20718 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
20719 if (SDValue SMaxOp =
20720 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
20721 if (HiC == SignedMax && LoC == SignedMin)
20722 return SMaxOp;
20723
20724 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
20725 if (SDValue SMinOp =
20726 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
20727 if (HiC == SignedMax && LoC == SignedMin)
20728 return SMinOp;
20729
20730 return SDValue();
20731 };
20732
20733 SDValue Src = N->getOperand(0);
20734
20735 // Look through multiple layers of truncates.
20736 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
20737 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
20738 Src.hasOneUse())
20739 Src = Src.getOperand(0);
20740
20741 SDValue Val;
20742 unsigned ClipOpc;
20743 if ((Val = DetectUSatPattern(Src)))
20744 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
20745 else if ((Val = DetectSSatPattern(Src)))
20746 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
20747 else
20748 return SDValue();
20749
20750 MVT ValVT = Val.getSimpleValueType();
20751
20752 do {
20753 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
20754 ValVT = ValVT.changeVectorElementType(ValEltVT);
20755 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
20756 } while (ValVT != VT);
20757
20758 return Val;
20759}
20760
20761// Convert
20762// (iX ctpop (bitcast (vXi1 A)))
20763// ->
20764// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
20765// and
20766// (iN reduce.add (zext (vXi1 A to vXiN))
20767// ->
20768// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
20769// FIXME: It's complicated to match all the variations of this after type
20770// legalization so we only handle the pre-type legalization pattern, but that
20771// requires the fixed vector type to be legal.
20773 const RISCVSubtarget &Subtarget) {
20774 unsigned Opc = N->getOpcode();
20776 "Unexpected opcode");
20777 EVT VT = N->getValueType(0);
20778 if (!VT.isScalarInteger())
20779 return SDValue();
20780
20781 SDValue Src = N->getOperand(0);
20782
20783 if (Opc == ISD::CTPOP) {
20784 // Peek through zero_extend. It doesn't change the count.
20785 if (Src.getOpcode() == ISD::ZERO_EXTEND)
20786 Src = Src.getOperand(0);
20787
20788 if (Src.getOpcode() != ISD::BITCAST)
20789 return SDValue();
20790 Src = Src.getOperand(0);
20791 } else if (Opc == ISD::VECREDUCE_ADD) {
20792 if (Src.getOpcode() != ISD::ZERO_EXTEND)
20793 return SDValue();
20794 Src = Src.getOperand(0);
20795 }
20796
20797 EVT SrcEVT = Src.getValueType();
20798 if (!SrcEVT.isSimple())
20799 return SDValue();
20800
20801 MVT SrcMVT = SrcEVT.getSimpleVT();
20802 // Make sure the input is an i1 vector.
20803 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
20804 return SDValue();
20805
20806 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20807 if (!TLI.isTypeLegal(SrcMVT))
20808 return SDValue();
20809
20810 // Check that destination type is large enough to hold result without
20811 // overflow.
20812 if (Opc == ISD::VECREDUCE_ADD) {
20813 unsigned EltSize = SrcMVT.getScalarSizeInBits();
20814 unsigned MinSize = SrcMVT.getSizeInBits().getKnownMinValue();
20815 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
20816 unsigned MaxVLMAX = SrcMVT.isFixedLengthVector()
20817 ? SrcMVT.getVectorNumElements()
20819 VectorBitsMax, EltSize, MinSize);
20820 if (VT.getFixedSizeInBits() < Log2_32(MaxVLMAX) + 1)
20821 return SDValue();
20822 }
20823
20824 MVT ContainerVT = SrcMVT;
20825 if (SrcMVT.isFixedLengthVector()) {
20826 ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
20827 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
20828 }
20829
20830 SDLoc DL(N);
20831 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
20832
20833 MVT XLenVT = Subtarget.getXLenVT();
20834 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
20835 return DAG.getZExtOrTrunc(Pop, DL, VT);
20836}
20837
20840 const RISCVSubtarget &Subtarget) {
20841 // (shl (zext x), y) -> (vwsll x, y)
20842 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20843 return V;
20844
20845 // (shl (sext x), C) -> (vwmulsu x, 1u << C)
20846 // (shl (zext x), C) -> (vwmulu x, 1u << C)
20847
20848 if (!DCI.isAfterLegalizeDAG())
20849 return SDValue();
20850
20851 SDValue LHS = N->getOperand(0);
20852 if (!LHS.hasOneUse())
20853 return SDValue();
20854 unsigned Opcode;
20855 switch (LHS.getOpcode()) {
20856 case ISD::SIGN_EXTEND:
20857 case RISCVISD::VSEXT_VL:
20858 Opcode = RISCVISD::VWMULSU_VL;
20859 break;
20860 case ISD::ZERO_EXTEND:
20861 case RISCVISD::VZEXT_VL:
20862 Opcode = RISCVISD::VWMULU_VL;
20863 break;
20864 default:
20865 return SDValue();
20866 }
20867
20868 SDValue RHS = N->getOperand(1);
20869 APInt ShAmt;
20870 uint64_t ShAmtInt;
20871 if (ISD::isConstantSplatVector(RHS.getNode(), ShAmt))
20872 ShAmtInt = ShAmt.getZExtValue();
20873 else if (RHS.getOpcode() == RISCVISD::VMV_V_X_VL &&
20874 RHS.getOperand(1).getOpcode() == ISD::Constant)
20875 ShAmtInt = RHS.getConstantOperandVal(1);
20876 else
20877 return SDValue();
20878
20879 // Better foldings:
20880 // (shl (sext x), 1) -> (vwadd x, x)
20881 // (shl (zext x), 1) -> (vwaddu x, x)
20882 if (ShAmtInt <= 1)
20883 return SDValue();
20884
20885 SDValue NarrowOp = LHS.getOperand(0);
20886 MVT NarrowVT = NarrowOp.getSimpleValueType();
20887 uint64_t NarrowBits = NarrowVT.getScalarSizeInBits();
20888 if (ShAmtInt >= NarrowBits)
20889 return SDValue();
20890 MVT VT = N->getSimpleValueType(0);
20891 if (NarrowBits * 2 != VT.getScalarSizeInBits())
20892 return SDValue();
20893
20894 SelectionDAG &DAG = DCI.DAG;
20895 SDLoc DL(N);
20896 SDValue Passthru, Mask, VL;
20897 switch (N->getOpcode()) {
20898 case ISD::SHL:
20899 Passthru = DAG.getUNDEF(VT);
20900 std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
20901 break;
20902 case RISCVISD::SHL_VL:
20903 Passthru = N->getOperand(2);
20904 Mask = N->getOperand(3);
20905 VL = N->getOperand(4);
20906 break;
20907 default:
20908 llvm_unreachable("Expected SHL");
20909 }
20910 return DAG.getNode(Opcode, DL, VT, NarrowOp,
20911 DAG.getConstant(1ULL << ShAmtInt, SDLoc(RHS), NarrowVT),
20912 Passthru, Mask, VL);
20913}
20914
20916 DAGCombinerInfo &DCI) const {
20917 SelectionDAG &DAG = DCI.DAG;
20918 const MVT XLenVT = Subtarget.getXLenVT();
20919 SDLoc DL(N);
20920
20921 // Helper to call SimplifyDemandedBits on an operand of N where only some low
20922 // bits are demanded. N will be added to the Worklist if it was not deleted.
20923 // Caller should return SDValue(N, 0) if this returns true.
20924 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
20925 SDValue Op = N->getOperand(OpNo);
20926 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
20927 if (!SimplifyDemandedBits(Op, Mask, DCI))
20928 return false;
20929
20930 if (N->getOpcode() != ISD::DELETED_NODE)
20931 DCI.AddToWorklist(N);
20932 return true;
20933 };
20934
20935 switch (N->getOpcode()) {
20936 default:
20937 break;
20938 case RISCVISD::SplitF64: {
20939 SDValue Op0 = N->getOperand(0);
20940 // If the input to SplitF64 is just BuildPairF64 then the operation is
20941 // redundant. Instead, use BuildPairF64's operands directly.
20942 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
20943 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
20944
20945 if (Op0->isUndef()) {
20946 SDValue Lo = DAG.getUNDEF(MVT::i32);
20947 SDValue Hi = DAG.getUNDEF(MVT::i32);
20948 return DCI.CombineTo(N, Lo, Hi);
20949 }
20950
20951 // It's cheaper to materialise two 32-bit integers than to load a double
20952 // from the constant pool and transfer it to integer registers through the
20953 // stack.
20955 APInt V = C->getValueAPF().bitcastToAPInt();
20956 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
20957 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
20958 return DCI.CombineTo(N, Lo, Hi);
20959 }
20960
20961 // This is a target-specific version of a DAGCombine performed in
20962 // DAGCombiner::visitBITCAST. It performs the equivalent of:
20963 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
20964 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
20965 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
20966 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
20967 break;
20968 SDValue NewSplitF64 =
20969 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
20970 Op0.getOperand(0));
20971 SDValue Lo = NewSplitF64.getValue(0);
20972 SDValue Hi = NewSplitF64.getValue(1);
20973 APInt SignBit = APInt::getSignMask(32);
20974 if (Op0.getOpcode() == ISD::FNEG) {
20975 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
20976 DAG.getConstant(SignBit, DL, MVT::i32));
20977 return DCI.CombineTo(N, Lo, NewHi);
20978 }
20979 assert(Op0.getOpcode() == ISD::FABS);
20980 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
20981 DAG.getConstant(~SignBit, DL, MVT::i32));
20982 return DCI.CombineTo(N, Lo, NewHi);
20983 }
20984 case RISCVISD::SLLW:
20985 case RISCVISD::SRAW:
20986 case RISCVISD::SRLW:
20987 case RISCVISD::RORW:
20988 case RISCVISD::ROLW: {
20989 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
20990 if (SimplifyDemandedLowBitsHelper(0, 32) ||
20991 SimplifyDemandedLowBitsHelper(1, 5))
20992 return SDValue(N, 0);
20993
20994 break;
20995 }
20996 case RISCVISD::ABSW:
20997 case RISCVISD::CLSW:
20998 case RISCVISD::CLZW:
20999 case RISCVISD::CTZW: {
21000 // Only the lower 32 bits of the first operand are read
21001 if (SimplifyDemandedLowBitsHelper(0, 32))
21002 return SDValue(N, 0);
21003 break;
21004 }
21005 case RISCVISD::FMV_W_X_RV64: {
21006 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
21007 // conversion is unnecessary and can be replaced with the
21008 // FMV_X_ANYEXTW_RV64 operand.
21009 SDValue Op0 = N->getOperand(0);
21010 if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
21011 return Op0.getOperand(0);
21012 break;
21013 }
21014 case RISCVISD::FMV_X_ANYEXTH:
21015 case RISCVISD::FMV_X_ANYEXTW_RV64: {
21016 SDLoc DL(N);
21017 SDValue Op0 = N->getOperand(0);
21018 MVT VT = N->getSimpleValueType(0);
21019
21020 // Constant fold.
21021 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
21022 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
21023 return DAG.getConstant(Val, DL, VT);
21024 }
21025
21026 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
21027 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
21028 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
21029 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
21030 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
21031 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
21032 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
21033 assert(Op0.getOperand(0).getValueType() == VT &&
21034 "Unexpected value type!");
21035 return Op0.getOperand(0);
21036 }
21037
21038 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
21039 cast<LoadSDNode>(Op0)->isSimple()) {
21041 auto *LN0 = cast<LoadSDNode>(Op0);
21042 SDValue Load =
21043 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
21044 LN0->getBasePtr(), IVT, LN0->getMemOperand());
21045 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
21046 return Load;
21047 }
21048
21049 // This is a target-specific version of a DAGCombine performed in
21050 // DAGCombiner::visitBITCAST. It performs the equivalent of:
21051 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
21052 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
21053 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
21054 !Op0.getNode()->hasOneUse())
21055 break;
21056 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
21057 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
21058 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
21059 if (Op0.getOpcode() == ISD::FNEG)
21060 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
21061 DAG.getConstant(SignBit, DL, VT));
21062
21063 assert(Op0.getOpcode() == ISD::FABS);
21064 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
21065 DAG.getConstant(~SignBit, DL, VT));
21066 }
21067 case ISD::ABS: {
21068 EVT VT = N->getValueType(0);
21069 SDValue N0 = N->getOperand(0);
21070 // abs (sext) -> zext (abs)
21071 // abs (zext) -> zext (handled elsewhere)
21072 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
21073 SDValue Src = N0.getOperand(0);
21074 SDLoc DL(N);
21075 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
21076 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
21077 }
21078 break;
21079 }
21080 case ISD::ADD: {
21081 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
21082 return V;
21083 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
21084 return V;
21085 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
21086 return V;
21087 return performADDCombine(N, DCI, Subtarget);
21088 }
21089 case ISD::SUB: {
21090 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
21091 return V;
21092 return performSUBCombine(N, DAG, Subtarget);
21093 }
21094 case ISD::AND:
21095 return performANDCombine(N, DCI, Subtarget);
21096 case ISD::OR: {
21097 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
21098 return V;
21099 return performORCombine(N, DCI, Subtarget);
21100 }
21101 case ISD::XOR:
21102 return performXORCombine(N, DAG, Subtarget);
21103 case ISD::MUL:
21104 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
21105 return V;
21106 return performMULCombine(N, DAG, DCI, Subtarget);
21107 case ISD::SDIV:
21108 case ISD::UDIV:
21109 case ISD::SREM:
21110 case ISD::UREM:
21111 if (SDValue V = combineBinOpOfZExt(N, DAG))
21112 return V;
21113 break;
21114 case ISD::FMUL: {
21115 using namespace SDPatternMatch;
21116 SDLoc DL(N);
21117 EVT VT = N->getValueType(0);
21118 SDValue X, Y;
21119 // InstCombine canonicalizes fneg (fmul x, y) -> fmul x, (fneg y), see
21120 // hoistFNegAboveFMulFDiv.
21121 // Undo this and sink the fneg so we match more fmsub/fnmadd patterns.
21123 return DAG.getNode(ISD::FNEG, DL, VT,
21124 DAG.getNode(ISD::FMUL, DL, VT, X, Y, N->getFlags()),
21125 N->getFlags());
21126
21127 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
21128 SDValue N0 = N->getOperand(0);
21129 SDValue N1 = N->getOperand(1);
21130 if (N0->getOpcode() != ISD::FCOPYSIGN)
21131 std::swap(N0, N1);
21132 if (N0->getOpcode() != ISD::FCOPYSIGN)
21133 return SDValue();
21135 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
21136 return SDValue();
21137 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
21138 return SDValue();
21139 SDValue Sign = N0->getOperand(1);
21140 if (Sign.getValueType() != VT)
21141 return SDValue();
21142 return DAG.getNode(RISCVISD::FSGNJX, DL, VT, N1, N0->getOperand(1));
21143 }
21144 case ISD::FADD:
21145 case ISD::UMAX:
21146 case ISD::UMIN:
21147 case ISD::SMAX:
21148 case ISD::SMIN:
21149 case ISD::FMAXNUM:
21150 case ISD::FMINNUM: {
21151 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
21152 return V;
21153 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
21154 return V;
21155 return SDValue();
21156 }
21157 case ISD::FMA: {
21158 SDValue N0 = N->getOperand(0);
21159 SDValue N1 = N->getOperand(1);
21160 if (N0.getOpcode() != ISD::SPLAT_VECTOR)
21161 std::swap(N0, N1);
21162 if (N0.getOpcode() != ISD::SPLAT_VECTOR)
21163 return SDValue();
21164 SDValue SplatN0 = N0.getOperand(0);
21165 if (SplatN0.getOpcode() != ISD::FNEG || !SplatN0.hasOneUse())
21166 return SDValue();
21167 EVT VT = N->getValueType(0);
21168 SDValue Splat =
21169 DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, SplatN0.getOperand(0));
21170 SDValue Fneg = DAG.getNode(ISD::FNEG, DL, VT, Splat);
21171 return DAG.getNode(ISD::FMA, DL, VT, Fneg, N1, N->getOperand(2));
21172 }
21173 case ISD::SETCC:
21174 return performSETCCCombine(N, DCI, Subtarget);
21176 return performSIGN_EXTEND_INREGCombine(N, DCI, Subtarget);
21177 case ISD::ZERO_EXTEND:
21178 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
21179 // type legalization. This is safe because fp_to_uint produces poison if
21180 // it overflows.
21181 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
21182 SDValue Src = N->getOperand(0);
21183 if (Src.getOpcode() == ISD::FP_TO_UINT &&
21184 isTypeLegal(Src.getOperand(0).getValueType()))
21185 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
21186 Src.getOperand(0));
21187 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
21188 isTypeLegal(Src.getOperand(1).getValueType())) {
21189 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
21190 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
21191 Src.getOperand(0), Src.getOperand(1));
21192 DCI.CombineTo(N, Res);
21193 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
21194 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
21195 return SDValue(N, 0); // Return N so it doesn't get rechecked.
21196 }
21197 }
21198 return SDValue();
21199 case RISCVISD::TRUNCATE_VECTOR_VL:
21200 if (SDValue V = combineTruncOfSraSext(N, DAG))
21201 return V;
21202 return combineTruncToVnclip(N, DAG, Subtarget);
21203 case ISD::VP_TRUNCATE: