LLVM 19.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
20#include "llvm/IR/IntrinsicsRISCV.h"
22#include "llvm/Support/Debug.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE "riscv-isel"
29#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
30
32 "riscv-use-rematerializable-movimm", cl::Hidden,
33 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
34 "constant materialization"),
35 cl::init(false));
36
37namespace llvm::RISCV {
38#define GET_RISCVVSSEGTable_IMPL
39#define GET_RISCVVLSEGTable_IMPL
40#define GET_RISCVVLXSEGTable_IMPL
41#define GET_RISCVVSXSEGTable_IMPL
42#define GET_RISCVVLETable_IMPL
43#define GET_RISCVVSETable_IMPL
44#define GET_RISCVVLXTable_IMPL
45#define GET_RISCVVSXTable_IMPL
46#include "RISCVGenSearchableTables.inc"
47} // namespace llvm::RISCV
48
51
52 bool MadeChange = false;
53 while (Position != CurDAG->allnodes_begin()) {
54 SDNode *N = &*--Position;
55 if (N->use_empty())
56 continue;
57
58 SDValue Result;
59 switch (N->getOpcode()) {
60 case ISD::SPLAT_VECTOR: {
61 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
62 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
63 MVT VT = N->getSimpleValueType(0);
64 unsigned Opc =
66 SDLoc DL(N);
67 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
68 SDValue Src = N->getOperand(0);
69 if (VT.isInteger())
70 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
71 N->getOperand(0));
72 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
73 break;
74 }
76 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
77 // load. Done after lowering and combining so that we have a chance to
78 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
79 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
80 MVT VT = N->getSimpleValueType(0);
81 SDValue Passthru = N->getOperand(0);
82 SDValue Lo = N->getOperand(1);
83 SDValue Hi = N->getOperand(2);
84 SDValue VL = N->getOperand(3);
85 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
86 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
87 "Unexpected VTs!");
89 SDLoc DL(N);
90
91 // Create temporary stack for each expanding node.
92 SDValue StackSlot =
94 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
96
97 SDValue Chain = CurDAG->getEntryNode();
98 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
99
100 SDValue OffsetSlot =
102 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
103 Align(8));
104
105 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
106
107 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
108 SDValue IntID =
109 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
110 SDValue Ops[] = {Chain,
111 IntID,
112 Passthru,
113 StackSlot,
114 CurDAG->getRegister(RISCV::X0, MVT::i64),
115 VL};
116
118 MVT::i64, MPI, Align(8),
120 break;
121 }
122 }
123
124 if (Result) {
125 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
126 LLVM_DEBUG(N->dump(CurDAG));
127 LLVM_DEBUG(dbgs() << "\nNew: ");
128 LLVM_DEBUG(Result->dump(CurDAG));
129 LLVM_DEBUG(dbgs() << "\n");
130
132 MadeChange = true;
133 }
134 }
135
136 if (MadeChange)
138}
139
141 HandleSDNode Dummy(CurDAG->getRoot());
143
144 bool MadeChange = false;
145 while (Position != CurDAG->allnodes_begin()) {
146 SDNode *N = &*--Position;
147 // Skip dead nodes and any non-machine opcodes.
148 if (N->use_empty() || !N->isMachineOpcode())
149 continue;
150
151 MadeChange |= doPeepholeSExtW(N);
152
153 // FIXME: This is here only because the VMerge transform doesn't
154 // know how to handle masked true inputs. Once that has been moved
155 // to post-ISEL, this can be deleted as well.
156 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
157 }
158
159 CurDAG->setRoot(Dummy.getValue());
160
161 MadeChange |= doPeepholeMergeVVMFold();
162
163 // After we're done with everything else, convert IMPLICIT_DEF
164 // passthru operands to NoRegister. This is required to workaround
165 // an optimization deficiency in MachineCSE. This really should
166 // be merged back into each of the patterns (i.e. there's no good
167 // reason not to go directly to NoReg), but is being done this way
168 // to allow easy backporting.
169 MadeChange |= doPeepholeNoRegPassThru();
170
171 if (MadeChange)
173}
174
175static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
177 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
178 for (const RISCVMatInt::Inst &Inst : Seq) {
179 SDValue SDImm = CurDAG->getTargetConstant(Inst.getImm(), DL, VT);
180 SDNode *Result = nullptr;
181 switch (Inst.getOpndKind()) {
182 case RISCVMatInt::Imm:
183 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
184 break;
186 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
187 CurDAG->getRegister(RISCV::X0, VT));
188 break;
190 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
191 break;
193 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
194 break;
195 }
196
197 // Only the first instruction has X0 as its source.
198 SrcReg = SDValue(Result, 0);
199 }
200
201 return SrcReg;
202}
203
204static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
205 int64_t Imm, const RISCVSubtarget &Subtarget) {
207
208 // Use a rematerializable pseudo instruction for short sequences if enabled.
209 if (Seq.size() == 2 && UsePseudoMovImm)
210 return SDValue(
211 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
212 CurDAG->getTargetConstant(Imm, DL, VT)),
213 0);
214
215 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
216 // worst an LUI+ADDIW. This will require an extra register, but avoids a
217 // constant pool.
218 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
219 // low and high 32 bits are the same and bit 31 and 63 are set.
220 if (Seq.size() > 3) {
221 unsigned ShiftAmt, AddOpc;
223 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
224 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
225 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
226
227 SDValue SLLI = SDValue(
228 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
229 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
230 0);
231 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
232 }
233 }
234
235 // Otherwise, use the original sequence.
236 return selectImmSeq(CurDAG, DL, VT, Seq);
237}
238
240 unsigned NF, RISCVII::VLMUL LMUL) {
241 static const unsigned M1TupleRegClassIDs[] = {
242 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
243 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
244 RISCV::VRN8M1RegClassID};
245 static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID,
246 RISCV::VRN3M2RegClassID,
247 RISCV::VRN4M2RegClassID};
248
249 assert(Regs.size() >= 2 && Regs.size() <= 8);
250
251 unsigned RegClassID;
252 unsigned SubReg0;
253 switch (LMUL) {
254 default:
255 llvm_unreachable("Invalid LMUL.");
260 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
261 "Unexpected subreg numbering");
262 SubReg0 = RISCV::sub_vrm1_0;
263 RegClassID = M1TupleRegClassIDs[NF - 2];
264 break;
266 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
267 "Unexpected subreg numbering");
268 SubReg0 = RISCV::sub_vrm2_0;
269 RegClassID = M2TupleRegClassIDs[NF - 2];
270 break;
272 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
273 "Unexpected subreg numbering");
274 SubReg0 = RISCV::sub_vrm4_0;
275 RegClassID = RISCV::VRN2M4RegClassID;
276 break;
277 }
278
279 SDLoc DL(Regs[0]);
281
282 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
283
284 for (unsigned I = 0; I < Regs.size(); ++I) {
285 Ops.push_back(Regs[I]);
286 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
287 }
288 SDNode *N =
289 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
290 return SDValue(N, 0);
291}
292
294 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
295 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
296 bool IsLoad, MVT *IndexVT) {
297 SDValue Chain = Node->getOperand(0);
298 SDValue Glue;
299
300 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
301
302 if (IsStridedOrIndexed) {
303 Operands.push_back(Node->getOperand(CurOp++)); // Index.
304 if (IndexVT)
305 *IndexVT = Operands.back()->getSimpleValueType(0);
306 }
307
308 if (IsMasked) {
309 // Mask needs to be copied to V0.
310 SDValue Mask = Node->getOperand(CurOp++);
311 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
312 Glue = Chain.getValue(1);
313 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
314 }
315 SDValue VL;
316 selectVLOp(Node->getOperand(CurOp++), VL);
317 Operands.push_back(VL);
318
319 MVT XLenVT = Subtarget->getXLenVT();
320 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
321 Operands.push_back(SEWOp);
322
323 // At the IR layer, all the masked load intrinsics have policy operands,
324 // none of the others do. All have passthru operands. For our pseudos,
325 // all loads have policy operands.
326 if (IsLoad) {
328 if (IsMasked)
329 Policy = Node->getConstantOperandVal(CurOp++);
330 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
331 Operands.push_back(PolicyOp);
332 }
333
334 Operands.push_back(Chain); // Chain.
335 if (Glue)
336 Operands.push_back(Glue);
337}
338
339void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
340 bool IsStrided) {
341 SDLoc DL(Node);
342 unsigned NF = Node->getNumValues() - 1;
343 MVT VT = Node->getSimpleValueType(0);
344 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
346
347 unsigned CurOp = 2;
349
350 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
351 Node->op_begin() + CurOp + NF);
352 SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
353 Operands.push_back(Merge);
354 CurOp += NF;
355
356 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
357 Operands, /*IsLoad=*/true);
358
359 const RISCV::VLSEGPseudo *P =
360 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
361 static_cast<unsigned>(LMUL));
362 MachineSDNode *Load =
363 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
364
365 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
366 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
367
368 SDValue SuperReg = SDValue(Load, 0);
369 for (unsigned I = 0; I < NF; ++I) {
370 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
371 ReplaceUses(SDValue(Node, I),
372 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
373 }
374
375 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
376 CurDAG->RemoveDeadNode(Node);
377}
378
379void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
380 SDLoc DL(Node);
381 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain.
382 MVT VT = Node->getSimpleValueType(0);
383 MVT XLenVT = Subtarget->getXLenVT();
384 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
386
387 unsigned CurOp = 2;
389
390 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
391 Node->op_begin() + CurOp + NF);
392 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
393 Operands.push_back(MaskedOff);
394 CurOp += NF;
395
396 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
397 /*IsStridedOrIndexed*/ false, Operands,
398 /*IsLoad=*/true);
399
400 const RISCV::VLSEGPseudo *P =
401 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
402 Log2SEW, static_cast<unsigned>(LMUL));
403 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
404 XLenVT, MVT::Other, Operands);
405
406 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
407 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
408
409 SDValue SuperReg = SDValue(Load, 0);
410 for (unsigned I = 0; I < NF; ++I) {
411 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
412 ReplaceUses(SDValue(Node, I),
413 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
414 }
415
416 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL
417 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain
418 CurDAG->RemoveDeadNode(Node);
419}
420
421void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
422 bool IsOrdered) {
423 SDLoc DL(Node);
424 unsigned NF = Node->getNumValues() - 1;
425 MVT VT = Node->getSimpleValueType(0);
426 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
428
429 unsigned CurOp = 2;
431
432 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
433 Node->op_begin() + CurOp + NF);
434 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
435 Operands.push_back(MaskedOff);
436 CurOp += NF;
437
438 MVT IndexVT;
439 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
440 /*IsStridedOrIndexed*/ true, Operands,
441 /*IsLoad=*/true, &IndexVT);
442
444 "Element count mismatch");
445
446 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
447 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
448 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
449 report_fatal_error("The V extension does not support EEW=64 for index "
450 "values when XLEN=32");
451 }
452 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
453 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
454 static_cast<unsigned>(IndexLMUL));
455 MachineSDNode *Load =
456 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
457
458 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
459 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
460
461 SDValue SuperReg = SDValue(Load, 0);
462 for (unsigned I = 0; I < NF; ++I) {
463 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
464 ReplaceUses(SDValue(Node, I),
465 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
466 }
467
468 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
469 CurDAG->RemoveDeadNode(Node);
470}
471
472void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked,
473 bool IsStrided) {
474 SDLoc DL(Node);
475 unsigned NF = Node->getNumOperands() - 4;
476 if (IsStrided)
477 NF--;
478 if (IsMasked)
479 NF--;
480 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
481 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
483 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
484 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
485
487 Operands.push_back(StoreVal);
488 unsigned CurOp = 2 + NF;
489
490 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
491 Operands);
492
493 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
494 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
495 MachineSDNode *Store =
496 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
497
498 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
499 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
500
501 ReplaceNode(Node, Store);
502}
503
504void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
505 bool IsOrdered) {
506 SDLoc DL(Node);
507 unsigned NF = Node->getNumOperands() - 5;
508 if (IsMasked)
509 --NF;
510 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
511 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
513 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
514 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
515
517 Operands.push_back(StoreVal);
518 unsigned CurOp = 2 + NF;
519
520 MVT IndexVT;
521 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
522 /*IsStridedOrIndexed*/ true, Operands,
523 /*IsLoad=*/false, &IndexVT);
524
526 "Element count mismatch");
527
528 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
529 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
530 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
531 report_fatal_error("The V extension does not support EEW=64 for index "
532 "values when XLEN=32");
533 }
534 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
535 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
536 static_cast<unsigned>(IndexLMUL));
537 MachineSDNode *Store =
538 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
539
540 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
541 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
542
543 ReplaceNode(Node, Store);
544}
545
547 if (!Subtarget->hasVInstructions())
548 return;
549
550 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
551
552 SDLoc DL(Node);
553 MVT XLenVT = Subtarget->getXLenVT();
554
555 unsigned IntNo = Node->getConstantOperandVal(0);
556
557 assert((IntNo == Intrinsic::riscv_vsetvli ||
558 IntNo == Intrinsic::riscv_vsetvlimax) &&
559 "Unexpected vsetvli intrinsic");
560
561 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
562 unsigned Offset = (VLMax ? 1 : 2);
563
564 assert(Node->getNumOperands() == Offset + 2 &&
565 "Unexpected number of operands");
566
567 unsigned SEW =
568 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
569 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
570 Node->getConstantOperandVal(Offset + 1) & 0x7);
571
572 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
573 /*MaskAgnostic*/ true);
574 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
575
576 SDValue VLOperand;
577 unsigned Opcode = RISCV::PseudoVSETVLI;
578 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
579 if (auto VLEN = Subtarget->getRealVLen())
580 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
581 VLMax = true;
582 }
583 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
584 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
585 Opcode = RISCV::PseudoVSETVLIX0;
586 } else {
587 VLOperand = Node->getOperand(1);
588
589 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
590 uint64_t AVL = C->getZExtValue();
591 if (isUInt<5>(AVL)) {
592 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
593 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
594 XLenVT, VLImm, VTypeIOp));
595 return;
596 }
597 }
598 }
599
600 ReplaceNode(Node,
601 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
602}
603
605 MVT VT = Node->getSimpleValueType(0);
606 unsigned Opcode = Node->getOpcode();
607 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
608 "Unexpected opcode");
609 SDLoc DL(Node);
610
611 // For operations of the form (x << C1) op C2, check if we can use
612 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
613 SDValue N0 = Node->getOperand(0);
614 SDValue N1 = Node->getOperand(1);
615
616 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
617 if (!Cst)
618 return false;
619
620 int64_t Val = Cst->getSExtValue();
621
622 // Check if immediate can already use ANDI/ORI/XORI.
623 if (isInt<12>(Val))
624 return false;
625
626 SDValue Shift = N0;
627
628 // If Val is simm32 and we have a sext_inreg from i32, then the binop
629 // produces at least 33 sign bits. We can peek through the sext_inreg and use
630 // a SLLIW at the end.
631 bool SignExt = false;
632 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
633 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
634 SignExt = true;
635 Shift = N0.getOperand(0);
636 }
637
638 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
639 return false;
640
641 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
642 if (!ShlCst)
643 return false;
644
645 uint64_t ShAmt = ShlCst->getZExtValue();
646
647 // Make sure that we don't change the operation by removing bits.
648 // This only matters for OR and XOR, AND is unaffected.
649 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
650 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
651 return false;
652
653 int64_t ShiftedVal = Val >> ShAmt;
654 if (!isInt<12>(ShiftedVal))
655 return false;
656
657 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
658 if (SignExt && ShAmt >= 32)
659 return false;
660
661 // Ok, we can reorder to get a smaller immediate.
662 unsigned BinOpc;
663 switch (Opcode) {
664 default: llvm_unreachable("Unexpected opcode");
665 case ISD::AND: BinOpc = RISCV::ANDI; break;
666 case ISD::OR: BinOpc = RISCV::ORI; break;
667 case ISD::XOR: BinOpc = RISCV::XORI; break;
668 }
669
670 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
671
672 SDNode *BinOp =
673 CurDAG->getMachineNode(BinOpc, DL, VT, Shift.getOperand(0),
674 CurDAG->getTargetConstant(ShiftedVal, DL, VT));
675 SDNode *SLLI =
676 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
677 CurDAG->getTargetConstant(ShAmt, DL, VT));
678 ReplaceNode(Node, SLLI);
679 return true;
680}
681
683 // Only supported with XTHeadBb at the moment.
684 if (!Subtarget->hasVendorXTHeadBb())
685 return false;
686
687 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
688 if (!N1C)
689 return false;
690
691 SDValue N0 = Node->getOperand(0);
692 if (!N0.hasOneUse())
693 return false;
694
695 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
696 MVT VT) {
697 return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
698 CurDAG->getTargetConstant(Msb, DL, VT),
699 CurDAG->getTargetConstant(Lsb, DL, VT));
700 };
701
702 SDLoc DL(Node);
703 MVT VT = Node->getSimpleValueType(0);
704 const unsigned RightShAmt = N1C->getZExtValue();
705
706 // Transform (sra (shl X, C1) C2) with C1 < C2
707 // -> (TH.EXT X, msb, lsb)
708 if (N0.getOpcode() == ISD::SHL) {
709 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
710 if (!N01C)
711 return false;
712
713 const unsigned LeftShAmt = N01C->getZExtValue();
714 // Make sure that this is a bitfield extraction (i.e., the shift-right
715 // amount can not be less than the left-shift).
716 if (LeftShAmt > RightShAmt)
717 return false;
718
719 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
720 const unsigned Msb = MsbPlusOne - 1;
721 const unsigned Lsb = RightShAmt - LeftShAmt;
722
723 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
724 ReplaceNode(Node, TH_EXT);
725 return true;
726 }
727
728 // Transform (sra (sext_inreg X, _), C) ->
729 // (TH.EXT X, msb, lsb)
730 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
731 unsigned ExtSize =
732 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
733
734 // ExtSize of 32 should use sraiw via tablegen pattern.
735 if (ExtSize == 32)
736 return false;
737
738 const unsigned Msb = ExtSize - 1;
739 const unsigned Lsb = RightShAmt;
740
741 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
742 ReplaceNode(Node, TH_EXT);
743 return true;
744 }
745
746 return false;
747}
748
750 // Target does not support indexed loads.
751 if (!Subtarget->hasVendorXTHeadMemIdx())
752 return false;
753
754 LoadSDNode *Ld = cast<LoadSDNode>(Node);
756 if (AM == ISD::UNINDEXED)
757 return false;
758
759 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
760 if (!C)
761 return false;
762
763 EVT LoadVT = Ld->getMemoryVT();
764 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
765 "Unexpected addressing mode");
766 bool IsPre = AM == ISD::PRE_INC;
767 bool IsPost = AM == ISD::POST_INC;
768 int64_t Offset = C->getSExtValue();
769
770 // The constants that can be encoded in the THeadMemIdx instructions
771 // are of the form (sign_extend(imm5) << imm2).
772 int64_t Shift;
773 for (Shift = 0; Shift < 4; Shift++)
774 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
775 break;
776
777 // Constant cannot be encoded.
778 if (Shift == 4)
779 return false;
780
781 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
782 unsigned Opcode;
783 if (LoadVT == MVT::i8 && IsPre)
784 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
785 else if (LoadVT == MVT::i8 && IsPost)
786 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
787 else if (LoadVT == MVT::i16 && IsPre)
788 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
789 else if (LoadVT == MVT::i16 && IsPost)
790 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
791 else if (LoadVT == MVT::i32 && IsPre)
792 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
793 else if (LoadVT == MVT::i32 && IsPost)
794 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
795 else if (LoadVT == MVT::i64 && IsPre)
796 Opcode = RISCV::TH_LDIB;
797 else if (LoadVT == MVT::i64 && IsPost)
798 Opcode = RISCV::TH_LDIA;
799 else
800 return false;
801
802 EVT Ty = Ld->getOffset().getValueType();
803 SDValue Ops[] = {Ld->getBasePtr(),
804 CurDAG->getTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
805 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty),
806 Ld->getChain()};
807 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
808 Ld->getValueType(1), MVT::Other, Ops);
809
810 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
811 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
812
813 ReplaceNode(Node, New);
814
815 return true;
816}
817
819 if (!Subtarget->hasVInstructions())
820 return;
821
822 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
823
824 SDLoc DL(Node);
825 unsigned IntNo = Node->getConstantOperandVal(1);
826
827 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
828 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
829 "Unexpected vsetvli intrinsic");
830
831 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
832 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
833 SDValue SEWOp =
834 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
835 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
836 Node->getOperand(4), Node->getOperand(5),
837 Node->getOperand(8), SEWOp,
838 Node->getOperand(0)};
839
840 unsigned Opcode;
841 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
842 switch (LMulSDNode->getSExtValue()) {
843 case 5:
844 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF8
845 : RISCV::PseudoVC_I_SE_MF8;
846 break;
847 case 6:
848 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF4
849 : RISCV::PseudoVC_I_SE_MF4;
850 break;
851 case 7:
852 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF2
853 : RISCV::PseudoVC_I_SE_MF2;
854 break;
855 case 0:
856 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M1
857 : RISCV::PseudoVC_I_SE_M1;
858 break;
859 case 1:
860 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M2
861 : RISCV::PseudoVC_I_SE_M2;
862 break;
863 case 2:
864 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M4
865 : RISCV::PseudoVC_I_SE_M4;
866 break;
867 case 3:
868 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M8
869 : RISCV::PseudoVC_I_SE_M8;
870 break;
871 }
872
874 Opcode, DL, Node->getSimpleValueType(0), Operands));
875}
876
878 // If we have a custom node, we have already selected.
879 if (Node->isMachineOpcode()) {
880 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
881 Node->setNodeId(-1);
882 return;
883 }
884
885 // Instruction Selection not handled by the auto-generated tablegen selection
886 // should be handled here.
887 unsigned Opcode = Node->getOpcode();
888 MVT XLenVT = Subtarget->getXLenVT();
889 SDLoc DL(Node);
890 MVT VT = Node->getSimpleValueType(0);
891
892 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
893
894 switch (Opcode) {
895 case ISD::Constant: {
896 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
897 auto *ConstNode = cast<ConstantSDNode>(Node);
898 if (ConstNode->isZero()) {
899 SDValue New =
900 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
901 ReplaceNode(Node, New.getNode());
902 return;
903 }
904 int64_t Imm = ConstNode->getSExtValue();
905 // If only the lower 8 bits are used, try to convert this to a simm6 by
906 // sign-extending bit 7. This is neutral without the C extension, and
907 // allows C.LI to be used if C is present.
908 if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
909 Imm = SignExtend64<8>(Imm);
910 // If the upper XLen-16 bits are not used, try to convert this to a simm12
911 // by sign extending bit 15.
912 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
913 hasAllHUsers(Node))
914 Imm = SignExtend64<16>(Imm);
915 // If the upper 32-bits are not used try to convert this into a simm32 by
916 // sign extending bit 32.
917 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
918 Imm = SignExtend64<32>(Imm);
919
920 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
921 return;
922 }
923 case ISD::ConstantFP: {
924 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
925 auto [FPImm, NeedsFNeg] =
926 static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
927 VT);
928 if (FPImm >= 0) {
929 unsigned Opc;
930 unsigned FNegOpc;
931 switch (VT.SimpleTy) {
932 default:
933 llvm_unreachable("Unexpected size");
934 case MVT::f16:
935 Opc = RISCV::FLI_H;
936 FNegOpc = RISCV::FSGNJN_H;
937 break;
938 case MVT::f32:
939 Opc = RISCV::FLI_S;
940 FNegOpc = RISCV::FSGNJN_S;
941 break;
942 case MVT::f64:
943 Opc = RISCV::FLI_D;
944 FNegOpc = RISCV::FSGNJN_D;
945 break;
946 }
948 Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));
949 if (NeedsFNeg)
950 Res = CurDAG->getMachineNode(FNegOpc, DL, VT, SDValue(Res, 0),
951 SDValue(Res, 0));
952
953 ReplaceNode(Node, Res);
954 return;
955 }
956
957 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
958 SDValue Imm;
959 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
960 // create an integer immediate.
961 if (APF.isPosZero() || NegZeroF64)
962 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
963 else
964 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
965 *Subtarget);
966
967 bool HasZdinx = Subtarget->hasStdExtZdinx();
968 bool Is64Bit = Subtarget->is64Bit();
969 unsigned Opc;
970 switch (VT.SimpleTy) {
971 default:
972 llvm_unreachable("Unexpected size");
973 case MVT::bf16:
974 assert(Subtarget->hasStdExtZfbfmin());
975 Opc = RISCV::FMV_H_X;
976 break;
977 case MVT::f16:
978 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
979 break;
980 case MVT::f32:
981 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
982 break;
983 case MVT::f64:
984 // For RV32, we can't move from a GPR, we need to convert instead. This
985 // should only happen for +0.0 and -0.0.
986 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
987 if (Is64Bit)
988 Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
989 else
990 Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
991 break;
992 }
993
994 SDNode *Res;
995 if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
996 Res = CurDAG->getMachineNode(
997 Opc, DL, VT, Imm,
999 else
1000 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1001
1002 // For f64 -0.0, we need to insert a fneg.d idiom.
1003 if (NegZeroF64) {
1004 Opc = RISCV::FSGNJN_D;
1005 if (HasZdinx)
1006 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1007 Res =
1008 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1009 }
1010
1011 ReplaceNode(Node, Res);
1012 return;
1013 }
1015 if (!Subtarget->hasStdExtZdinx())
1016 break;
1017
1018 assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1019
1020 SDValue Ops[] = {
1021 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1022 Node->getOperand(0),
1023 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1024 Node->getOperand(1),
1025 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1026
1027 SDNode *N =
1028 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::f64, Ops);
1029 ReplaceNode(Node, N);
1030 return;
1031 }
1032 case RISCVISD::SplitF64: {
1033 if (Subtarget->hasStdExtZdinx()) {
1034 assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1035
1036 if (!SDValue(Node, 0).use_empty()) {
1037 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, VT,
1038 Node->getOperand(0));
1039 ReplaceUses(SDValue(Node, 0), Lo);
1040 }
1041
1042 if (!SDValue(Node, 1).use_empty()) {
1043 SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, VT,
1044 Node->getOperand(0));
1045 ReplaceUses(SDValue(Node, 1), Hi);
1046 }
1047
1048 CurDAG->RemoveDeadNode(Node);
1049 return;
1050 }
1051
1052 if (!Subtarget->hasStdExtZfa())
1053 break;
1054 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1055 "Unexpected subtarget");
1056
1057 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1058 if (!SDValue(Node, 0).use_empty()) {
1059 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1060 Node->getOperand(0));
1061 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1062 }
1063 if (!SDValue(Node, 1).use_empty()) {
1064 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1065 Node->getOperand(0));
1066 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1067 }
1068
1069 CurDAG->RemoveDeadNode(Node);
1070 return;
1071 }
1072 case ISD::SHL: {
1073 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1074 if (!N1C)
1075 break;
1076 SDValue N0 = Node->getOperand(0);
1077 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1078 !isa<ConstantSDNode>(N0.getOperand(1)))
1079 break;
1080 unsigned ShAmt = N1C->getZExtValue();
1081 uint64_t Mask = N0.getConstantOperandVal(1);
1082
1083 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
1084 // 32 leading zeros and C3 trailing zeros.
1085 if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
1086 unsigned XLen = Subtarget->getXLen();
1087 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1088 unsigned TrailingZeros = llvm::countr_zero(Mask);
1089 if (TrailingZeros > 0 && LeadingZeros == 32) {
1090 SDNode *SRLIW = CurDAG->getMachineNode(
1091 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1092 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1093 SDNode *SLLI = CurDAG->getMachineNode(
1094 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1095 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1096 ReplaceNode(Node, SLLI);
1097 return;
1098 }
1099 }
1100 break;
1101 }
1102 case ISD::SRL: {
1103 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1104 if (!N1C)
1105 break;
1106 SDValue N0 = Node->getOperand(0);
1107 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1108 break;
1109 unsigned ShAmt = N1C->getZExtValue();
1110 uint64_t Mask = N0.getConstantOperandVal(1);
1111
1112 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1113 // 32 leading zeros and C3 trailing zeros.
1114 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1115 unsigned XLen = Subtarget->getXLen();
1116 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1117 unsigned TrailingZeros = llvm::countr_zero(Mask);
1118 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1119 SDNode *SRLIW = CurDAG->getMachineNode(
1120 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1121 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1122 SDNode *SLLI = CurDAG->getMachineNode(
1123 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1124 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1125 ReplaceNode(Node, SLLI);
1126 return;
1127 }
1128 }
1129
1130 // Optimize (srl (and X, C2), C) ->
1131 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1132 // Where C2 is a mask with C3 trailing ones.
1133 // Taking into account that the C2 may have had lower bits unset by
1134 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1135 // This pattern occurs when type legalizing right shifts for types with
1136 // less than XLen bits.
1137 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1138 if (!isMask_64(Mask))
1139 break;
1140 unsigned TrailingOnes = llvm::countr_one(Mask);
1141 if (ShAmt >= TrailingOnes)
1142 break;
1143 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1144 if (TrailingOnes == 32) {
1145 SDNode *SRLI = CurDAG->getMachineNode(
1146 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1147 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1148 ReplaceNode(Node, SRLI);
1149 return;
1150 }
1151
1152 // Only do the remaining transforms if the AND has one use.
1153 if (!N0.hasOneUse())
1154 break;
1155
1156 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1157 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1158 SDNode *BEXTI = CurDAG->getMachineNode(
1159 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1160 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1161 ReplaceNode(Node, BEXTI);
1162 return;
1163 }
1164
1165 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1166 SDNode *SLLI =
1167 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1168 CurDAG->getTargetConstant(LShAmt, DL, VT));
1169 SDNode *SRLI = CurDAG->getMachineNode(
1170 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1171 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1172 ReplaceNode(Node, SRLI);
1173 return;
1174 }
1175 case ISD::SRA: {
1176 if (trySignedBitfieldExtract(Node))
1177 return;
1178
1179 // Optimize (sra (sext_inreg X, i16), C) ->
1180 // (srai (slli X, (XLen-16), (XLen-16) + C)
1181 // And (sra (sext_inreg X, i8), C) ->
1182 // (srai (slli X, (XLen-8), (XLen-8) + C)
1183 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1184 // This transform matches the code we get without Zbb. The shifts are more
1185 // compressible, and this can help expose CSE opportunities in the sdiv by
1186 // constant optimization.
1187 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1188 if (!N1C)
1189 break;
1190 SDValue N0 = Node->getOperand(0);
1191 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1192 break;
1193 unsigned ShAmt = N1C->getZExtValue();
1194 unsigned ExtSize =
1195 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1196 // ExtSize of 32 should use sraiw via tablegen pattern.
1197 if (ExtSize >= 32 || ShAmt >= ExtSize)
1198 break;
1199 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1200 SDNode *SLLI =
1201 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1202 CurDAG->getTargetConstant(LShAmt, DL, VT));
1203 SDNode *SRAI = CurDAG->getMachineNode(
1204 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1205 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1206 ReplaceNode(Node, SRAI);
1207 return;
1208 }
1209 case ISD::OR:
1210 case ISD::XOR:
1211 if (tryShrinkShlLogicImm(Node))
1212 return;
1213
1214 break;
1215 case ISD::AND: {
1216 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1217 if (!N1C)
1218 break;
1219 uint64_t C1 = N1C->getZExtValue();
1220 const bool isC1Mask = isMask_64(C1);
1221 const bool isC1ANDI = isInt<12>(C1);
1222
1223 SDValue N0 = Node->getOperand(0);
1224
1225 auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
1226 SDValue X, unsigned Msb,
1227 unsigned Lsb) {
1228 if (!Subtarget->hasVendorXTHeadBb())
1229 return false;
1230
1231 SDNode *TH_EXTU = CurDAG->getMachineNode(
1232 RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
1233 CurDAG->getTargetConstant(Lsb, DL, VT));
1234 ReplaceNode(Node, TH_EXTU);
1235 return true;
1236 };
1237
1238 bool LeftShift = N0.getOpcode() == ISD::SHL;
1239 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1240 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1241 if (!C)
1242 break;
1243 unsigned C2 = C->getZExtValue();
1244 unsigned XLen = Subtarget->getXLen();
1245 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1246
1247 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1248 // shift pair might offer more compression opportunities.
1249 // TODO: We could check for C extension here, but we don't have many lit
1250 // tests with the C extension enabled so not checking gets better
1251 // coverage.
1252 // TODO: What if ANDI faster than shift?
1253 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1254
1255 // Clear irrelevant bits in the mask.
1256 if (LeftShift)
1257 C1 &= maskTrailingZeros<uint64_t>(C2);
1258 else
1259 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1260
1261 // Some transforms should only be done if the shift has a single use or
1262 // the AND would become (srli (slli X, 32), 32)
1263 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1264
1265 SDValue X = N0.getOperand(0);
1266
1267 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1268 // with c3 leading zeros.
1269 if (!LeftShift && isC1Mask) {
1270 unsigned Leading = XLen - llvm::bit_width(C1);
1271 if (C2 < Leading) {
1272 // If the number of leading zeros is C2+32 this can be SRLIW.
1273 if (C2 + 32 == Leading) {
1274 SDNode *SRLIW = CurDAG->getMachineNode(
1275 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1276 ReplaceNode(Node, SRLIW);
1277 return;
1278 }
1279
1280 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1281 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1282 //
1283 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1284 // legalized and goes through DAG combine.
1285 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1286 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1287 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1288 SDNode *SRAIW =
1289 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1290 CurDAG->getTargetConstant(31, DL, VT));
1291 SDNode *SRLIW = CurDAG->getMachineNode(
1292 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1293 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1294 ReplaceNode(Node, SRLIW);
1295 return;
1296 }
1297
1298 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1299 // available.
1300 // Transform (and (srl x, C2), C1)
1301 // -> (<bfextract> x, msb, lsb)
1302 //
1303 // Make sure to keep this below the SRLIW cases, as we always want to
1304 // prefer the more common instruction.
1305 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1306 const unsigned Lsb = C2;
1307 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1308 return;
1309
1310 // (srli (slli x, c3-c2), c3).
1311 // Skip if we could use (zext.w (sraiw X, C2)).
1312 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1313 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1314 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1315 // Also Skip if we can use bexti or th.tst.
1316 Skip |= HasBitTest && Leading == XLen - 1;
1317 if (OneUseOrZExtW && !Skip) {
1318 SDNode *SLLI = CurDAG->getMachineNode(
1319 RISCV::SLLI, DL, VT, X,
1320 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1321 SDNode *SRLI = CurDAG->getMachineNode(
1322 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1323 CurDAG->getTargetConstant(Leading, DL, VT));
1324 ReplaceNode(Node, SRLI);
1325 return;
1326 }
1327 }
1328 }
1329
1330 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1331 // shifted by c2 bits with c3 leading zeros.
1332 if (LeftShift && isShiftedMask_64(C1)) {
1333 unsigned Leading = XLen - llvm::bit_width(C1);
1334
1335 if (C2 + Leading < XLen &&
1336 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1337 // Use slli.uw when possible.
1338 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1339 SDNode *SLLI_UW =
1340 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1341 CurDAG->getTargetConstant(C2, DL, VT));
1342 ReplaceNode(Node, SLLI_UW);
1343 return;
1344 }
1345
1346 // (srli (slli c2+c3), c3)
1347 if (OneUseOrZExtW && !IsCANDI) {
1348 SDNode *SLLI = CurDAG->getMachineNode(
1349 RISCV::SLLI, DL, VT, X,
1350 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1351 SDNode *SRLI = CurDAG->getMachineNode(
1352 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1353 CurDAG->getTargetConstant(Leading, DL, VT));
1354 ReplaceNode(Node, SRLI);
1355 return;
1356 }
1357 }
1358 }
1359
1360 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1361 // shifted mask with c2 leading zeros and c3 trailing zeros.
1362 if (!LeftShift && isShiftedMask_64(C1)) {
1363 unsigned Leading = XLen - llvm::bit_width(C1);
1364 unsigned Trailing = llvm::countr_zero(C1);
1365 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1366 !IsCANDI) {
1367 unsigned SrliOpc = RISCV::SRLI;
1368 // If the input is zexti32 we should use SRLIW.
1369 if (X.getOpcode() == ISD::AND &&
1370 isa<ConstantSDNode>(X.getOperand(1)) &&
1371 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1372 SrliOpc = RISCV::SRLIW;
1373 X = X.getOperand(0);
1374 }
1375 SDNode *SRLI = CurDAG->getMachineNode(
1376 SrliOpc, DL, VT, X,
1377 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1378 SDNode *SLLI = CurDAG->getMachineNode(
1379 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1380 CurDAG->getTargetConstant(Trailing, DL, VT));
1381 ReplaceNode(Node, SLLI);
1382 return;
1383 }
1384 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1385 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1386 OneUseOrZExtW && !IsCANDI) {
1387 SDNode *SRLIW = CurDAG->getMachineNode(
1388 RISCV::SRLIW, DL, VT, X,
1389 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1390 SDNode *SLLI = CurDAG->getMachineNode(
1391 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1392 CurDAG->getTargetConstant(Trailing, DL, VT));
1393 ReplaceNode(Node, SLLI);
1394 return;
1395 }
1396 }
1397
1398 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1399 // shifted mask with no leading zeros and c3 trailing zeros.
1400 if (LeftShift && isShiftedMask_64(C1)) {
1401 unsigned Leading = XLen - llvm::bit_width(C1);
1402 unsigned Trailing = llvm::countr_zero(C1);
1403 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1404 SDNode *SRLI = CurDAG->getMachineNode(
1405 RISCV::SRLI, DL, VT, X,
1406 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1407 SDNode *SLLI = CurDAG->getMachineNode(
1408 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1409 CurDAG->getTargetConstant(Trailing, DL, VT));
1410 ReplaceNode(Node, SLLI);
1411 return;
1412 }
1413 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1414 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1415 SDNode *SRLIW = CurDAG->getMachineNode(
1416 RISCV::SRLIW, DL, VT, X,
1417 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1418 SDNode *SLLI = CurDAG->getMachineNode(
1419 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1420 CurDAG->getTargetConstant(Trailing, DL, VT));
1421 ReplaceNode(Node, SLLI);
1422 return;
1423 }
1424
1425 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1426 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1427 Subtarget->hasStdExtZba()) {
1428 SDNode *SRLI = CurDAG->getMachineNode(
1429 RISCV::SRLI, DL, VT, X,
1430 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1431 SDNode *SLLI_UW = CurDAG->getMachineNode(
1432 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1433 CurDAG->getTargetConstant(Trailing, DL, VT));
1434 ReplaceNode(Node, SLLI_UW);
1435 return;
1436 }
1437 }
1438 }
1439
1440 // If C1 masks off the upper bits only (but can't be formed as an
1441 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1442 // available.
1443 // Transform (and x, C1)
1444 // -> (<bfextract> x, msb, lsb)
1445 if (isC1Mask && !isC1ANDI) {
1446 const unsigned Msb = llvm::bit_width(C1) - 1;
1447 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1448 return;
1449 }
1450
1451 if (tryShrinkShlLogicImm(Node))
1452 return;
1453
1454 break;
1455 }
1456 case ISD::MUL: {
1457 // Special case for calculating (mul (and X, C2), C1) where the full product
1458 // fits in XLen bits. We can shift X left by the number of leading zeros in
1459 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1460 // product has XLen trailing zeros, putting it in the output of MULHU. This
1461 // can avoid materializing a constant in a register for C2.
1462
1463 // RHS should be a constant.
1464 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1465 if (!N1C || !N1C->hasOneUse())
1466 break;
1467
1468 // LHS should be an AND with constant.
1469 SDValue N0 = Node->getOperand(0);
1470 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1471 break;
1472
1474
1475 // Constant should be a mask.
1476 if (!isMask_64(C2))
1477 break;
1478
1479 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1480 // multiple users or the constant is a simm12. This prevents inserting a
1481 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1482 // make it more costly to materialize. Otherwise, using a SLLI might allow
1483 // it to be compressed.
1484 bool IsANDIOrZExt =
1485 isInt<12>(C2) ||
1486 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1487 // With XTHeadBb, we can use TH.EXTU.
1488 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1489 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1490 break;
1491 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1492 // the constant is a simm32.
1493 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1494 // With XTHeadBb, we can use TH.EXTU.
1495 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1496 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1497 break;
1498
1499 // We need to shift left the AND input and C1 by a total of XLen bits.
1500
1501 // How far left do we need to shift the AND input?
1502 unsigned XLen = Subtarget->getXLen();
1503 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1504
1505 // The constant gets shifted by the remaining amount unless that would
1506 // shift bits out.
1507 uint64_t C1 = N1C->getZExtValue();
1508 unsigned ConstantShift = XLen - LeadingZeros;
1509 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1510 break;
1511
1512 uint64_t ShiftedC1 = C1 << ConstantShift;
1513 // If this RV32, we need to sign extend the constant.
1514 if (XLen == 32)
1515 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1516
1517 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1518 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1519 SDNode *SLLI =
1520 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1521 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1522 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1523 SDValue(SLLI, 0), SDValue(Imm, 0));
1524 ReplaceNode(Node, MULHU);
1525 return;
1526 }
1527 case ISD::LOAD: {
1528 if (tryIndexedLoad(Node))
1529 return;
1530 break;
1531 }
1533 unsigned IntNo = Node->getConstantOperandVal(0);
1534 switch (IntNo) {
1535 // By default we do not custom select any intrinsic.
1536 default:
1537 break;
1538 case Intrinsic::riscv_vmsgeu:
1539 case Intrinsic::riscv_vmsge: {
1540 SDValue Src1 = Node->getOperand(1);
1541 SDValue Src2 = Node->getOperand(2);
1542 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1543 bool IsCmpUnsignedZero = false;
1544 // Only custom select scalar second operand.
1545 if (Src2.getValueType() != XLenVT)
1546 break;
1547 // Small constants are handled with patterns.
1548 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1549 int64_t CVal = C->getSExtValue();
1550 if (CVal >= -15 && CVal <= 16) {
1551 if (!IsUnsigned || CVal != 0)
1552 break;
1553 IsCmpUnsignedZero = true;
1554 }
1555 }
1556 MVT Src1VT = Src1.getSimpleValueType();
1557 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;
1558 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1559 default:
1560 llvm_unreachable("Unexpected LMUL!");
1561#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \
1562 case RISCVII::VLMUL::lmulenum: \
1563 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1564 : RISCV::PseudoVMSLT_VX_##suffix; \
1565 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1566 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \
1567 break;
1568 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)
1569 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)
1570 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)
1572 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)
1573 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)
1574 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)
1575#undef CASE_VMSLT_VMNAND_VMSET_OPCODES
1576 }
1578 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1579 SDValue VL;
1580 selectVLOp(Node->getOperand(3), VL);
1581
1582 // If vmsgeu with 0 immediate, expand it to vmset.
1583 if (IsCmpUnsignedZero) {
1584 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));
1585 return;
1586 }
1587
1588 // Expand to
1589 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1590 SDValue Cmp = SDValue(
1591 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1592 0);
1593 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1594 {Cmp, Cmp, VL, SEW}));
1595 return;
1596 }
1597 case Intrinsic::riscv_vmsgeu_mask:
1598 case Intrinsic::riscv_vmsge_mask: {
1599 SDValue Src1 = Node->getOperand(2);
1600 SDValue Src2 = Node->getOperand(3);
1601 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1602 bool IsCmpUnsignedZero = false;
1603 // Only custom select scalar second operand.
1604 if (Src2.getValueType() != XLenVT)
1605 break;
1606 // Small constants are handled with patterns.
1607 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1608 int64_t CVal = C->getSExtValue();
1609 if (CVal >= -15 && CVal <= 16) {
1610 if (!IsUnsigned || CVal != 0)
1611 break;
1612 IsCmpUnsignedZero = true;
1613 }
1614 }
1615 MVT Src1VT = Src1.getSimpleValueType();
1616 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1617 VMOROpcode;
1618 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1619 default:
1620 llvm_unreachable("Unexpected LMUL!");
1621#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \
1622 case RISCVII::VLMUL::lmulenum: \
1623 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1624 : RISCV::PseudoVMSLT_VX_##suffix; \
1625 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
1626 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
1627 break;
1628 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)
1629 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)
1630 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)
1631 CASE_VMSLT_OPCODES(LMUL_1, M1, B8)
1632 CASE_VMSLT_OPCODES(LMUL_2, M2, B16)
1633 CASE_VMSLT_OPCODES(LMUL_4, M4, B32)
1634 CASE_VMSLT_OPCODES(LMUL_8, M8, B64)
1635#undef CASE_VMSLT_OPCODES
1636 }
1637 // Mask operations use the LMUL from the mask type.
1638 switch (RISCVTargetLowering::getLMUL(VT)) {
1639 default:
1640 llvm_unreachable("Unexpected LMUL!");
1641#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
1642 case RISCVII::VLMUL::lmulenum: \
1643 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
1644 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
1645 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
1646 break;
1647 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)
1648 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)
1649 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)
1654#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1655 }
1657 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1658 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1659 SDValue VL;
1660 selectVLOp(Node->getOperand(5), VL);
1661 SDValue MaskedOff = Node->getOperand(1);
1662 SDValue Mask = Node->getOperand(4);
1663
1664 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.
1665 if (IsCmpUnsignedZero) {
1666 // We don't need vmor if the MaskedOff and the Mask are the same
1667 // value.
1668 if (Mask == MaskedOff) {
1669 ReplaceUses(Node, Mask.getNode());
1670 return;
1671 }
1672 ReplaceNode(Node,
1673 CurDAG->getMachineNode(VMOROpcode, DL, VT,
1674 {Mask, MaskedOff, VL, MaskSEW}));
1675 return;
1676 }
1677
1678 // If the MaskedOff value and the Mask are the same value use
1679 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
1680 // This avoids needing to copy v0 to vd before starting the next sequence.
1681 if (Mask == MaskedOff) {
1682 SDValue Cmp = SDValue(
1683 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1684 0);
1685 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1686 {Mask, Cmp, VL, MaskSEW}));
1687 return;
1688 }
1689
1690 // Mask needs to be copied to V0.
1692 RISCV::V0, Mask, SDValue());
1693 SDValue Glue = Chain.getValue(1);
1694 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1695
1696 // Otherwise use
1697 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1698 // The result is mask undisturbed.
1699 // We use the same instructions to emulate mask agnostic behavior, because
1700 // the agnostic result can be either undisturbed or all 1.
1701 SDValue Cmp = SDValue(
1702 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1703 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1704 0);
1705 // vmxor.mm vd, vd, v0 is used to update active value.
1706 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1707 {Cmp, Mask, VL, MaskSEW}));
1708 return;
1709 }
1710 case Intrinsic::riscv_vsetvli:
1711 case Intrinsic::riscv_vsetvlimax:
1712 return selectVSETVLI(Node);
1713 }
1714 break;
1715 }
1717 unsigned IntNo = Node->getConstantOperandVal(1);
1718 switch (IntNo) {
1719 // By default we do not custom select any intrinsic.
1720 default:
1721 break;
1722 case Intrinsic::riscv_vlseg2:
1723 case Intrinsic::riscv_vlseg3:
1724 case Intrinsic::riscv_vlseg4:
1725 case Intrinsic::riscv_vlseg5:
1726 case Intrinsic::riscv_vlseg6:
1727 case Intrinsic::riscv_vlseg7:
1728 case Intrinsic::riscv_vlseg8: {
1729 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1730 return;
1731 }
1732 case Intrinsic::riscv_vlseg2_mask:
1733 case Intrinsic::riscv_vlseg3_mask:
1734 case Intrinsic::riscv_vlseg4_mask:
1735 case Intrinsic::riscv_vlseg5_mask:
1736 case Intrinsic::riscv_vlseg6_mask:
1737 case Intrinsic::riscv_vlseg7_mask:
1738 case Intrinsic::riscv_vlseg8_mask: {
1739 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1740 return;
1741 }
1742 case Intrinsic::riscv_vlsseg2:
1743 case Intrinsic::riscv_vlsseg3:
1744 case Intrinsic::riscv_vlsseg4:
1745 case Intrinsic::riscv_vlsseg5:
1746 case Intrinsic::riscv_vlsseg6:
1747 case Intrinsic::riscv_vlsseg7:
1748 case Intrinsic::riscv_vlsseg8: {
1749 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1750 return;
1751 }
1752 case Intrinsic::riscv_vlsseg2_mask:
1753 case Intrinsic::riscv_vlsseg3_mask:
1754 case Intrinsic::riscv_vlsseg4_mask:
1755 case Intrinsic::riscv_vlsseg5_mask:
1756 case Intrinsic::riscv_vlsseg6_mask:
1757 case Intrinsic::riscv_vlsseg7_mask:
1758 case Intrinsic::riscv_vlsseg8_mask: {
1759 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1760 return;
1761 }
1762 case Intrinsic::riscv_vloxseg2:
1763 case Intrinsic::riscv_vloxseg3:
1764 case Intrinsic::riscv_vloxseg4:
1765 case Intrinsic::riscv_vloxseg5:
1766 case Intrinsic::riscv_vloxseg6:
1767 case Intrinsic::riscv_vloxseg7:
1768 case Intrinsic::riscv_vloxseg8:
1769 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1770 return;
1771 case Intrinsic::riscv_vluxseg2:
1772 case Intrinsic::riscv_vluxseg3:
1773 case Intrinsic::riscv_vluxseg4:
1774 case Intrinsic::riscv_vluxseg5:
1775 case Intrinsic::riscv_vluxseg6:
1776 case Intrinsic::riscv_vluxseg7:
1777 case Intrinsic::riscv_vluxseg8:
1778 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1779 return;
1780 case Intrinsic::riscv_vloxseg2_mask:
1781 case Intrinsic::riscv_vloxseg3_mask:
1782 case Intrinsic::riscv_vloxseg4_mask:
1783 case Intrinsic::riscv_vloxseg5_mask:
1784 case Intrinsic::riscv_vloxseg6_mask:
1785 case Intrinsic::riscv_vloxseg7_mask:
1786 case Intrinsic::riscv_vloxseg8_mask:
1787 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1788 return;
1789 case Intrinsic::riscv_vluxseg2_mask:
1790 case Intrinsic::riscv_vluxseg3_mask:
1791 case Intrinsic::riscv_vluxseg4_mask:
1792 case Intrinsic::riscv_vluxseg5_mask:
1793 case Intrinsic::riscv_vluxseg6_mask:
1794 case Intrinsic::riscv_vluxseg7_mask:
1795 case Intrinsic::riscv_vluxseg8_mask:
1796 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1797 return;
1798 case Intrinsic::riscv_vlseg8ff:
1799 case Intrinsic::riscv_vlseg7ff:
1800 case Intrinsic::riscv_vlseg6ff:
1801 case Intrinsic::riscv_vlseg5ff:
1802 case Intrinsic::riscv_vlseg4ff:
1803 case Intrinsic::riscv_vlseg3ff:
1804 case Intrinsic::riscv_vlseg2ff: {
1805 selectVLSEGFF(Node, /*IsMasked*/ false);
1806 return;
1807 }
1808 case Intrinsic::riscv_vlseg8ff_mask:
1809 case Intrinsic::riscv_vlseg7ff_mask:
1810 case Intrinsic::riscv_vlseg6ff_mask:
1811 case Intrinsic::riscv_vlseg5ff_mask:
1812 case Intrinsic::riscv_vlseg4ff_mask:
1813 case Intrinsic::riscv_vlseg3ff_mask:
1814 case Intrinsic::riscv_vlseg2ff_mask: {
1815 selectVLSEGFF(Node, /*IsMasked*/ true);
1816 return;
1817 }
1818 case Intrinsic::riscv_vloxei:
1819 case Intrinsic::riscv_vloxei_mask:
1820 case Intrinsic::riscv_vluxei:
1821 case Intrinsic::riscv_vluxei_mask: {
1822 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1823 IntNo == Intrinsic::riscv_vluxei_mask;
1824 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1825 IntNo == Intrinsic::riscv_vloxei_mask;
1826
1827 MVT VT = Node->getSimpleValueType(0);
1828 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1829
1830 unsigned CurOp = 2;
1832 Operands.push_back(Node->getOperand(CurOp++));
1833
1834 MVT IndexVT;
1835 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1836 /*IsStridedOrIndexed*/ true, Operands,
1837 /*IsLoad=*/true, &IndexVT);
1838
1840 "Element count mismatch");
1841
1843 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1844 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1845 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1846 report_fatal_error("The V extension does not support EEW=64 for index "
1847 "values when XLEN=32");
1848 }
1849 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
1850 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
1851 static_cast<unsigned>(IndexLMUL));
1852 MachineSDNode *Load =
1853 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1854
1855 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1856 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1857
1858 ReplaceNode(Node, Load);
1859 return;
1860 }
1861 case Intrinsic::riscv_vlm:
1862 case Intrinsic::riscv_vle:
1863 case Intrinsic::riscv_vle_mask:
1864 case Intrinsic::riscv_vlse:
1865 case Intrinsic::riscv_vlse_mask: {
1866 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
1867 IntNo == Intrinsic::riscv_vlse_mask;
1868 bool IsStrided =
1869 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
1870
1871 MVT VT = Node->getSimpleValueType(0);
1872 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1873
1874 // The riscv_vlm intrinsic are always tail agnostic and no passthru
1875 // operand at the IR level. In pseudos, they have both policy and
1876 // passthru operand. The passthru operand is needed to track the
1877 // "tail undefined" state, and the policy is there just for
1878 // for consistency - it will always be "don't care" for the
1879 // unmasked form.
1880 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
1881 unsigned CurOp = 2;
1883 if (HasPassthruOperand)
1884 Operands.push_back(Node->getOperand(CurOp++));
1885 else {
1886 // We eagerly lower to implicit_def (instead of undef), as we
1887 // otherwise fail to select nodes such as: nxv1i1 = undef
1888 SDNode *Passthru =
1889 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
1890 Operands.push_back(SDValue(Passthru, 0));
1891 }
1892 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1893 Operands, /*IsLoad=*/true);
1894
1896 const RISCV::VLEPseudo *P =
1897 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
1898 static_cast<unsigned>(LMUL));
1899 MachineSDNode *Load =
1900 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1901
1902 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1903 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1904
1905 ReplaceNode(Node, Load);
1906 return;
1907 }
1908 case Intrinsic::riscv_vleff:
1909 case Intrinsic::riscv_vleff_mask: {
1910 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
1911
1912 MVT VT = Node->getSimpleValueType(0);
1913 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1914
1915 unsigned CurOp = 2;
1917 Operands.push_back(Node->getOperand(CurOp++));
1918 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1919 /*IsStridedOrIndexed*/ false, Operands,
1920 /*IsLoad=*/true);
1921
1923 const RISCV::VLEPseudo *P =
1924 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
1925 Log2SEW, static_cast<unsigned>(LMUL));
1927 P->Pseudo, DL, Node->getVTList(), Operands);
1928 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1929 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1930
1931 ReplaceNode(Node, Load);
1932 return;
1933 }
1934 }
1935 break;
1936 }
1937 case ISD::INTRINSIC_VOID: {
1938 unsigned IntNo = Node->getConstantOperandVal(1);
1939 switch (IntNo) {
1940 case Intrinsic::riscv_vsseg2:
1941 case Intrinsic::riscv_vsseg3:
1942 case Intrinsic::riscv_vsseg4:
1943 case Intrinsic::riscv_vsseg5:
1944 case Intrinsic::riscv_vsseg6:
1945 case Intrinsic::riscv_vsseg7:
1946 case Intrinsic::riscv_vsseg8: {
1947 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1948 return;
1949 }
1950 case Intrinsic::riscv_vsseg2_mask:
1951 case Intrinsic::riscv_vsseg3_mask:
1952 case Intrinsic::riscv_vsseg4_mask:
1953 case Intrinsic::riscv_vsseg5_mask:
1954 case Intrinsic::riscv_vsseg6_mask:
1955 case Intrinsic::riscv_vsseg7_mask:
1956 case Intrinsic::riscv_vsseg8_mask: {
1957 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1958 return;
1959 }
1960 case Intrinsic::riscv_vssseg2:
1961 case Intrinsic::riscv_vssseg3:
1962 case Intrinsic::riscv_vssseg4:
1963 case Intrinsic::riscv_vssseg5:
1964 case Intrinsic::riscv_vssseg6:
1965 case Intrinsic::riscv_vssseg7:
1966 case Intrinsic::riscv_vssseg8: {
1967 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1968 return;
1969 }
1970 case Intrinsic::riscv_vssseg2_mask:
1971 case Intrinsic::riscv_vssseg3_mask:
1972 case Intrinsic::riscv_vssseg4_mask:
1973 case Intrinsic::riscv_vssseg5_mask:
1974 case Intrinsic::riscv_vssseg6_mask:
1975 case Intrinsic::riscv_vssseg7_mask:
1976 case Intrinsic::riscv_vssseg8_mask: {
1977 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1978 return;
1979 }
1980 case Intrinsic::riscv_vsoxseg2:
1981 case Intrinsic::riscv_vsoxseg3:
1982 case Intrinsic::riscv_vsoxseg4:
1983 case Intrinsic::riscv_vsoxseg5:
1984 case Intrinsic::riscv_vsoxseg6:
1985 case Intrinsic::riscv_vsoxseg7:
1986 case Intrinsic::riscv_vsoxseg8:
1987 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1988 return;
1989 case Intrinsic::riscv_vsuxseg2:
1990 case Intrinsic::riscv_vsuxseg3:
1991 case Intrinsic::riscv_vsuxseg4:
1992 case Intrinsic::riscv_vsuxseg5:
1993 case Intrinsic::riscv_vsuxseg6:
1994 case Intrinsic::riscv_vsuxseg7:
1995 case Intrinsic::riscv_vsuxseg8:
1996 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1997 return;
1998 case Intrinsic::riscv_vsoxseg2_mask:
1999 case Intrinsic::riscv_vsoxseg3_mask:
2000 case Intrinsic::riscv_vsoxseg4_mask:
2001 case Intrinsic::riscv_vsoxseg5_mask:
2002 case Intrinsic::riscv_vsoxseg6_mask:
2003 case Intrinsic::riscv_vsoxseg7_mask:
2004 case Intrinsic::riscv_vsoxseg8_mask:
2005 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
2006 return;
2007 case Intrinsic::riscv_vsuxseg2_mask:
2008 case Intrinsic::riscv_vsuxseg3_mask:
2009 case Intrinsic::riscv_vsuxseg4_mask:
2010 case Intrinsic::riscv_vsuxseg5_mask:
2011 case Intrinsic::riscv_vsuxseg6_mask:
2012 case Intrinsic::riscv_vsuxseg7_mask:
2013 case Intrinsic::riscv_vsuxseg8_mask:
2014 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
2015 return;
2016 case Intrinsic::riscv_vsoxei:
2017 case Intrinsic::riscv_vsoxei_mask:
2018 case Intrinsic::riscv_vsuxei:
2019 case Intrinsic::riscv_vsuxei_mask: {
2020 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2021 IntNo == Intrinsic::riscv_vsuxei_mask;
2022 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2023 IntNo == Intrinsic::riscv_vsoxei_mask;
2024
2025 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2026 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2027
2028 unsigned CurOp = 2;
2030 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2031
2032 MVT IndexVT;
2033 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2034 /*IsStridedOrIndexed*/ true, Operands,
2035 /*IsLoad=*/false, &IndexVT);
2036
2038 "Element count mismatch");
2039
2041 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2042 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2043 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2044 report_fatal_error("The V extension does not support EEW=64 for index "
2045 "values when XLEN=32");
2046 }
2047 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2048 IsMasked, IsOrdered, IndexLog2EEW,
2049 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2050 MachineSDNode *Store =
2051 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2052
2053 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2054 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2055
2056 ReplaceNode(Node, Store);
2057 return;
2058 }
2059 case Intrinsic::riscv_vsm:
2060 case Intrinsic::riscv_vse:
2061 case Intrinsic::riscv_vse_mask:
2062 case Intrinsic::riscv_vsse:
2063 case Intrinsic::riscv_vsse_mask: {
2064 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2065 IntNo == Intrinsic::riscv_vsse_mask;
2066 bool IsStrided =
2067 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2068
2069 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2070 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2071
2072 unsigned CurOp = 2;
2074 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2075
2076 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2077 Operands);
2078
2080 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2081 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2082 MachineSDNode *Store =
2083 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2084 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2085 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2086
2087 ReplaceNode(Node, Store);
2088 return;
2089 }
2090 case Intrinsic::riscv_sf_vc_x_se:
2091 case Intrinsic::riscv_sf_vc_i_se:
2092 selectSF_VC_X_SE(Node);
2093 return;
2094 }
2095 break;
2096 }
2097 case ISD::BITCAST: {
2098 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2099 // Just drop bitcasts between vectors if both are fixed or both are
2100 // scalable.
2101 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2102 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2103 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2104 CurDAG->RemoveDeadNode(Node);
2105 return;
2106 }
2107 break;
2108 }
2109 case ISD::INSERT_SUBVECTOR: {
2110 SDValue V = Node->getOperand(0);
2111 SDValue SubV = Node->getOperand(1);
2112 SDLoc DL(SubV);
2113 auto Idx = Node->getConstantOperandVal(2);
2114 MVT SubVecVT = SubV.getSimpleValueType();
2115
2116 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2117 MVT SubVecContainerVT = SubVecVT;
2118 // Establish the correct scalable-vector types for any fixed-length type.
2119 if (SubVecVT.isFixedLengthVector()) {
2120 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2122 [[maybe_unused]] bool ExactlyVecRegSized =
2123 Subtarget->expandVScale(SubVecVT.getSizeInBits())
2124 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2125 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2126 .getKnownMinValue()));
2127 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2128 }
2129 MVT ContainerVT = VT;
2130 if (VT.isFixedLengthVector())
2131 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2132
2133 const auto *TRI = Subtarget->getRegisterInfo();
2134 unsigned SubRegIdx;
2135 std::tie(SubRegIdx, Idx) =
2137 ContainerVT, SubVecContainerVT, Idx, TRI);
2138
2139 // If the Idx hasn't been completely eliminated then this is a subvector
2140 // insert which doesn't naturally align to a vector register. These must
2141 // be handled using instructions to manipulate the vector registers.
2142 if (Idx != 0)
2143 break;
2144
2145 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
2146 [[maybe_unused]] bool IsSubVecPartReg =
2147 SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
2148 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
2149 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
2150 assert((!IsSubVecPartReg || V.isUndef()) &&
2151 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2152 "the subvector is smaller than a full-sized register");
2153
2154 // If we haven't set a SubRegIdx, then we must be going between
2155 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2156 if (SubRegIdx == RISCV::NoSubRegister) {
2157 unsigned InRegClassID =
2160 InRegClassID &&
2161 "Unexpected subvector extraction");
2162 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2163 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2164 DL, VT, SubV, RC);
2165 ReplaceNode(Node, NewNode);
2166 return;
2167 }
2168
2169 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2170 ReplaceNode(Node, Insert.getNode());
2171 return;
2172 }
2174 SDValue V = Node->getOperand(0);
2175 auto Idx = Node->getConstantOperandVal(1);
2176 MVT InVT = V.getSimpleValueType();
2177 SDLoc DL(V);
2178
2179 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2180 MVT SubVecContainerVT = VT;
2181 // Establish the correct scalable-vector types for any fixed-length type.
2182 if (VT.isFixedLengthVector()) {
2183 assert(Idx == 0);
2184 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2185 }
2186 if (InVT.isFixedLengthVector())
2187 InVT = TLI.getContainerForFixedLengthVector(InVT);
2188
2189 const auto *TRI = Subtarget->getRegisterInfo();
2190 unsigned SubRegIdx;
2191 std::tie(SubRegIdx, Idx) =
2193 InVT, SubVecContainerVT, Idx, TRI);
2194
2195 // If the Idx hasn't been completely eliminated then this is a subvector
2196 // extract which doesn't naturally align to a vector register. These must
2197 // be handled using instructions to manipulate the vector registers.
2198 if (Idx != 0)
2199 break;
2200
2201 // If we haven't set a SubRegIdx, then we must be going between
2202 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2203 if (SubRegIdx == RISCV::NoSubRegister) {
2204 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2206 InRegClassID &&
2207 "Unexpected subvector extraction");
2208 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2209 SDNode *NewNode =
2210 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2211 ReplaceNode(Node, NewNode);
2212 return;
2213 }
2214
2215 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2216 ReplaceNode(Node, Extract.getNode());
2217 return;
2218 }
2222 case RISCVISD::VFMV_V_F_VL: {
2223 // Try to match splat of a scalar load to a strided load with stride of x0.
2224 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2225 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2226 if (!Node->getOperand(0).isUndef())
2227 break;
2228 SDValue Src = Node->getOperand(1);
2229 auto *Ld = dyn_cast<LoadSDNode>(Src);
2230 // Can't fold load update node because the second
2231 // output is used so that load update node can't be removed.
2232 if (!Ld || Ld->isIndexed())
2233 break;
2234 EVT MemVT = Ld->getMemoryVT();
2235 // The memory VT should be the same size as the element type.
2236 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2237 break;
2238 if (!IsProfitableToFold(Src, Node, Node) ||
2239 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2240 break;
2241
2242 SDValue VL;
2243 if (IsScalarMove) {
2244 // We could deal with more VL if we update the VSETVLI insert pass to
2245 // avoid introducing more VSETVLI.
2246 if (!isOneConstant(Node->getOperand(2)))
2247 break;
2248 selectVLOp(Node->getOperand(2), VL);
2249 } else
2250 selectVLOp(Node->getOperand(2), VL);
2251
2252 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2253 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2254
2255 // If VL=1, then we don't need to do a strided load and can just do a
2256 // regular load.
2257 bool IsStrided = !isOneConstant(VL);
2258
2259 // Only do a strided load if we have optimized zero-stride vector load.
2260 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2261 break;
2262
2264 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2265 Ld->getBasePtr()};
2266 if (IsStrided)
2267 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2269 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2270 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2271
2273 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2274 /*IsMasked*/ false, IsStrided, /*FF*/ false,
2275 Log2SEW, static_cast<unsigned>(LMUL));
2276 MachineSDNode *Load =
2277 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2278 // Update the chain.
2279 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2280 // Record the mem-refs
2281 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2282 // Replace the splat with the vlse.
2283 ReplaceNode(Node, Load);
2284 return;
2285 }
2286 case ISD::PREFETCH:
2287 unsigned Locality = Node->getConstantOperandVal(3);
2288 if (Locality > 2)
2289 break;
2290
2291 if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) {
2292 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2294
2295 int NontemporalLevel = 0;
2296 switch (Locality) {
2297 case 0:
2298 NontemporalLevel = 3; // NTL.ALL
2299 break;
2300 case 1:
2301 NontemporalLevel = 1; // NTL.PALL
2302 break;
2303 case 2:
2304 NontemporalLevel = 0; // NTL.P1
2305 break;
2306 default:
2307 llvm_unreachable("unexpected locality value.");
2308 }
2309
2310 if (NontemporalLevel & 0b1)
2312 if (NontemporalLevel & 0b10)
2314 }
2315 break;
2316 }
2317
2318 // Select the default instruction.
2319 SelectCode(Node);
2320}
2321
2323 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2324 std::vector<SDValue> &OutOps) {
2325 // Always produce a register and immediate operand, as expected by
2326 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2327 switch (ConstraintID) {
2330 SDValue Op0, Op1;
2331 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2332 assert(Found && "SelectAddrRegImm should always succeed");
2333 OutOps.push_back(Op0);
2334 OutOps.push_back(Op1);
2335 return false;
2336 }
2338 OutOps.push_back(Op);
2339 OutOps.push_back(
2340 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2341 return false;
2342 default:
2343 report_fatal_error("Unexpected asm memory constraint " +
2344 InlineAsm::getMemConstraintName(ConstraintID));
2345 }
2346
2347 return true;
2348}
2349
2351 SDValue &Offset) {
2352 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2353 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2354 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2355 return true;
2356 }
2357
2358 return false;
2359}
2360
2361// Select a frame index and an optional immediate offset from an ADD or OR.
2363 SDValue &Offset) {
2365 return true;
2366
2368 return false;
2369
2370 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
2371 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2372 if (isInt<12>(CVal)) {
2373 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
2374 Subtarget->getXLenVT());
2376 Subtarget->getXLenVT());
2377 return true;
2378 }
2379 }
2380
2381 return false;
2382}
2383
2384// Fold constant addresses.
2385static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2386 const MVT VT, const RISCVSubtarget *Subtarget,
2388 bool IsPrefetch = false) {
2389 if (!isa<ConstantSDNode>(Addr))
2390 return false;
2391
2392 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2393
2394 // If the constant is a simm12, we can fold the whole constant and use X0 as
2395 // the base. If the constant can be materialized with LUI+simm12, use LUI as
2396 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2397 int64_t Lo12 = SignExtend64<12>(CVal);
2398 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2399 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2400 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2401 return false;
2402
2403 if (Hi) {
2404 int64_t Hi20 = (Hi >> 12) & 0xfffff;
2405 Base = SDValue(
2406 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2407 CurDAG->getTargetConstant(Hi20, DL, VT)),
2408 0);
2409 } else {
2410 Base = CurDAG->getRegister(RISCV::X0, VT);
2411 }
2412 Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2413 return true;
2414 }
2415
2416 // Ask how constant materialization would handle this constant.
2417 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2418
2419 // If the last instruction would be an ADDI, we can fold its immediate and
2420 // emit the rest of the sequence as the base.
2421 if (Seq.back().getOpcode() != RISCV::ADDI)
2422 return false;
2423 Lo12 = Seq.back().getImm();
2424 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2425 return false;
2426
2427 // Drop the last instruction.
2428 Seq.pop_back();
2429 assert(!Seq.empty() && "Expected more instructions in sequence");
2430
2431 Base = selectImmSeq(CurDAG, DL, VT, Seq);
2432 Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2433 return true;
2434}
2435
2436// Is this ADD instruction only used as the base pointer of scalar loads and
2437// stores?
2439 for (auto *Use : Add->uses()) {
2440 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
2441 Use->getOpcode() != ISD::ATOMIC_LOAD &&
2442 Use->getOpcode() != ISD::ATOMIC_STORE)
2443 return false;
2444 EVT VT = cast<MemSDNode>(Use)->getMemoryVT();
2445 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2446 VT != MVT::f64)
2447 return false;
2448 // Don't allow stores of the value. It must be used as the address.
2449 if (Use->getOpcode() == ISD::STORE &&
2450 cast<StoreSDNode>(Use)->getValue() == Add)
2451 return false;
2452 if (Use->getOpcode() == ISD::ATOMIC_STORE &&
2453 cast<AtomicSDNode>(Use)->getVal() == Add)
2454 return false;
2455 }
2456
2457 return true;
2458}
2459
2461 unsigned MaxShiftAmount,
2463 SDValue &Scale) {
2464 EVT VT = Addr.getSimpleValueType();
2465 auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2466 SDValue &Shift) {
2467 uint64_t ShiftAmt = 0;
2468 Index = N;
2469
2470 if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
2471 // Only match shifts by a value in range [0, MaxShiftAmount].
2472 if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
2473 Index = N.getOperand(0);
2474 ShiftAmt = N.getConstantOperandVal(1);
2475 }
2476 }
2477
2478 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
2479 return ShiftAmt != 0;
2480 };
2481
2482 if (Addr.getOpcode() == ISD::ADD) {
2483 if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2484 SDValue AddrB = Addr.getOperand(0);
2485 if (AddrB.getOpcode() == ISD::ADD &&
2486 UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
2487 !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
2488 isInt<12>(C1->getSExtValue())) {
2489 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2490 SDValue C1Val =
2491 CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
2492 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
2493 AddrB.getOperand(1), C1Val),
2494 0);
2495 return true;
2496 }
2497 } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
2498 Base = Addr.getOperand(1);
2499 return true;
2500 } else {
2501 UnwrapShl(Addr.getOperand(1), Index, Scale);
2502 Base = Addr.getOperand(0);
2503 return true;
2504 }
2505 } else if (UnwrapShl(Addr, Index, Scale)) {
2506 EVT VT = Addr.getValueType();
2507 Base = CurDAG->getRegister(RISCV::X0, VT);
2508 return true;
2509 }
2510
2511 return false;
2512}
2513
2515 SDValue &Offset, bool IsINX) {
2517 return true;
2518
2519 SDLoc DL(Addr);
2520 MVT VT = Addr.getSimpleValueType();
2521
2522 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2523 Base = Addr.getOperand(0);
2524 Offset = Addr.getOperand(1);
2525 return true;
2526 }
2527
2528 int64_t RV32ZdinxRange = IsINX ? 4 : 0;
2530 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2531 if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
2532 Base = Addr.getOperand(0);
2533 if (Base.getOpcode() == RISCVISD::ADD_LO) {
2534 SDValue LoOperand = Base.getOperand(1);
2535 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2536 // If the Lo in (ADD_LO hi, lo) is a global variable's address
2537 // (its low part, really), then we can rely on the alignment of that
2538 // variable to provide a margin of safety before low part can overflow
2539 // the 12 bits of the load/store offset. Check if CVal falls within
2540 // that margin; if so (low part + CVal) can't overflow.
2541 const DataLayout &DL = CurDAG->getDataLayout();
2542 Align Alignment = commonAlignment(
2543 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2544 if (CVal == 0 || Alignment > CVal) {
2545 int64_t CombinedOffset = CVal + GA->getOffset();
2546 Base = Base.getOperand(0);
2548 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2549 CombinedOffset, GA->getTargetFlags());
2550 return true;
2551 }
2552 }
2553 }
2554
2555 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2556 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2557 Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2558 return true;
2559 }
2560 }
2561
2562 // Handle ADD with large immediates.
2563 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2564 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2565 assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
2566 "simm12 not already handled?");
2567
2568 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2569 // an ADDI for part of the offset and fold the rest into the load/store.
2570 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2571 if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
2572 int64_t Adj = CVal < 0 ? -2048 : 2047;
2573 Base = SDValue(
2574 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2575 CurDAG->getTargetConstant(Adj, DL, VT)),
2576 0);
2577 Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);
2578 return true;
2579 }
2580
2581 // For larger immediates, we might be able to save one instruction from
2582 // constant materialization by folding the Lo12 bits of the immediate into
2583 // the address. We should only do this if the ADD is only used by loads and
2584 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2585 // separately with the full materialized immediate creating extra
2586 // instructions.
2587 if (isWorthFoldingAdd(Addr) &&
2588 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2589 Offset)) {
2590 // Insert an ADD instruction with the materialized Hi52 bits.
2591 Base = SDValue(
2592 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2593 0);
2594 return true;
2595 }
2596 }
2597
2598 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset))
2599 return true;
2600
2601 Base = Addr;
2602 Offset = CurDAG->getTargetConstant(0, DL, VT);
2603 return true;
2604}
2605
2606/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2607/// Offset shoule be all zeros.
2609 SDValue &Offset) {
2611 return true;
2612
2613 SDLoc DL(Addr);
2614 MVT VT = Addr.getSimpleValueType();
2615
2617 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2618 if (isInt<12>(CVal)) {
2619 Base = Addr.getOperand(0);
2620
2621 // Early-out if not a valid offset.
2622 if ((CVal & 0b11111) != 0) {
2623 Base = Addr;
2624 Offset = CurDAG->getTargetConstant(0, DL, VT);
2625 return true;
2626 }
2627
2628 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2629 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2630 Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2631 return true;
2632 }
2633 }
2634
2635 // Handle ADD with large immediates.
2636 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2637 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2638 assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
2639 "simm12 not already handled?");
2640
2641 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
2642 // one instruction by folding adjustment (-2048 or 2016) into the address.
2643 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
2644 int64_t Adj = CVal < 0 ? -2048 : 2016;
2645 int64_t AdjustedOffset = CVal - Adj;
2647 RISCV::ADDI, DL, VT, Addr.getOperand(0),
2648 CurDAG->getTargetConstant(AdjustedOffset, DL, VT)),
2649 0);
2650 Offset = CurDAG->getTargetConstant(Adj, DL, VT);
2651 return true;
2652 }
2653
2654 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2655 Offset, true)) {
2656 // Insert an ADD instruction with the materialized Hi52 bits.
2657 Base = SDValue(
2658 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2659 0);
2660 return true;
2661 }
2662 }
2663
2664 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true))
2665 return true;
2666
2667 Base = Addr;
2668 Offset = CurDAG->getTargetConstant(0, DL, VT);
2669 return true;
2670}
2671
2673 SDValue &ShAmt) {
2674 ShAmt = N;
2675
2676 // Peek through zext.
2677 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
2678 ShAmt = ShAmt.getOperand(0);
2679
2680 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
2681 // amount. If there is an AND on the shift amount, we can bypass it if it
2682 // doesn't affect any of those bits.
2683 if (ShAmt.getOpcode() == ISD::AND &&
2684 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2685 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
2686
2687 // Since the max shift amount is a power of 2 we can subtract 1 to make a
2688 // mask that covers the bits needed to represent all shift amounts.
2689 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
2690 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
2691
2692 if (ShMask.isSubsetOf(AndMask)) {
2693 ShAmt = ShAmt.getOperand(0);
2694 } else {
2695 // SimplifyDemandedBits may have optimized the mask so try restoring any
2696 // bits that are known zero.
2697 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
2698 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
2699 return true;
2700 ShAmt = ShAmt.getOperand(0);
2701 }
2702 }
2703
2704 if (ShAmt.getOpcode() == ISD::ADD &&
2705 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2706 uint64_t Imm = ShAmt.getConstantOperandVal(1);
2707 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
2708 // to avoid the ADD.
2709 if (Imm != 0 && Imm % ShiftWidth == 0) {
2710 ShAmt = ShAmt.getOperand(0);
2711 return true;
2712 }
2713 } else if (ShAmt.getOpcode() == ISD::SUB &&
2714 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
2715 uint64_t Imm = ShAmt.getConstantOperandVal(0);
2716 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2717 // generate a NEG instead of a SUB of a constant.
2718 if (Imm != 0 && Imm % ShiftWidth == 0) {
2719 SDLoc DL(ShAmt);
2720 EVT VT = ShAmt.getValueType();
2721 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
2722 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
2723 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
2724 ShAmt.getOperand(1));
2725 ShAmt = SDValue(Neg, 0);
2726 return true;
2727 }
2728 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2729 // to generate a NOT instead of a SUB of a constant.
2730 if (Imm % ShiftWidth == ShiftWidth - 1) {
2731 SDLoc DL(ShAmt);
2732 EVT VT = ShAmt.getValueType();
2733 MachineSDNode *Not =
2734 CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1),
2735 CurDAG->getTargetConstant(-1, DL, VT));
2736 ShAmt = SDValue(Not, 0);
2737 return true;
2738 }
2739 }
2740
2741 return true;
2742}
2743
2744/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2745/// check for equality with 0. This function emits instructions that convert the
2746/// seteq/setne into something that can be compared with 0.
2747/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
2748/// ISD::SETNE).
2750 SDValue &Val) {
2751 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
2752 "Unexpected condition code!");
2753
2754 // We're looking for a setcc.
2755 if (N->getOpcode() != ISD::SETCC)
2756 return false;
2757
2758 // Must be an equality comparison.
2759 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
2760 if (CCVal != ExpectedCCVal)
2761 return false;
2762
2763 SDValue LHS = N->getOperand(0);
2764 SDValue RHS = N->getOperand(1);
2765
2766 if (!LHS.getValueType().isScalarInteger())
2767 return false;
2768
2769 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
2770 if (isNullConstant(RHS)) {
2771 Val = LHS;
2772 return true;
2773 }
2774
2775 SDLoc DL(N);
2776
2777 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
2778 int64_t CVal = C->getSExtValue();
2779 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2780 // non-zero otherwise.
2781 if (CVal == -2048) {
2782 Val =
2784 RISCV::XORI, DL, N->getValueType(0), LHS,
2785 CurDAG->getTargetConstant(CVal, DL, N->getValueType(0))),
2786 0);
2787 return true;
2788 }
2789 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
2790 // LHS is equal to the RHS and non-zero otherwise.
2791 if (isInt<12>(CVal) || CVal == 2048) {
2792 Val =
2794 RISCV::ADDI, DL, N->getValueType(0), LHS,
2795 CurDAG->getTargetConstant(-CVal, DL, N->getValueType(0))),
2796 0);
2797 return true;
2798 }
2799 }
2800
2801 // If nothing else we can XOR the LHS and RHS to produce zero if they are
2802 // equal and a non-zero value if they aren't.
2803 Val = SDValue(
2804 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
2805 return true;
2806}
2807
2809 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
2810 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
2811 Val = N.getOperand(0);
2812 return true;
2813 }
2814
2815 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
2816 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
2817 return N;
2818
2819 SDValue N0 = N.getOperand(0);
2820 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2821 N.getConstantOperandVal(1) == ShiftAmt &&
2822 N0.getConstantOperandVal(1) == ShiftAmt)
2823 return N0.getOperand(0);
2824
2825 return N;
2826 };
2827
2828 MVT VT = N.getSimpleValueType();
2829 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
2830 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
2831 return true;
2832 }
2833
2834 return false;
2835}
2836
2838 if (N.getOpcode() == ISD::AND) {
2839 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
2840 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
2841 Val = N.getOperand(0);
2842 return true;
2843 }
2844 }
2845 MVT VT = N.getSimpleValueType();
2846 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
2847 if (CurDAG->MaskedValueIsZero(N, Mask)) {
2848 Val = N;
2849 return true;
2850 }
2851
2852 return false;
2853}
2854
2855/// Look for various patterns that can be done with a SHL that can be folded
2856/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
2857/// SHXADD we are trying to match.
2859 SDValue &Val) {
2860 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
2861 SDValue N0 = N.getOperand(0);
2862
2863 bool LeftShift = N0.getOpcode() == ISD::SHL;
2864 if ((LeftShift || N0.getOpcode() == ISD::SRL) &&
2865 isa<ConstantSDNode>(N0.getOperand(1))) {
2866 uint64_t Mask = N.getConstantOperandVal(1);
2867 unsigned C2 = N0.getConstantOperandVal(1);
2868
2869 unsigned XLen = Subtarget->getXLen();
2870 if (LeftShift)
2871 Mask &= maskTrailingZeros<uint64_t>(C2);
2872 else
2873 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
2874
2875 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
2876 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
2877 // followed by a SHXADD with c3 for the X amount.
2878 if (isShiftedMask_64(Mask)) {
2879 unsigned Leading = XLen - llvm::bit_width(Mask);
2880 unsigned Trailing = llvm::countr_zero(Mask);
2881 if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
2882 SDLoc DL(N);
2883 EVT VT = N.getValueType();
2885 RISCV::SRLI, DL, VT, N0.getOperand(0),
2886 CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
2887 0);
2888 return true;
2889 }
2890 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
2891 // leading zeros and c3 trailing zeros. We can use an SRLI by C3
2892 // followed by a SHXADD using c3 for the X amount.
2893 if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
2894 SDLoc DL(N);
2895 EVT VT = N.getValueType();
2896 Val = SDValue(
2898 RISCV::SRLI, DL, VT, N0.getOperand(0),
2899 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
2900 0);
2901 return true;
2902 }
2903 }
2904 }
2905 }
2906
2907 bool LeftShift = N.getOpcode() == ISD::SHL;
2908 if ((LeftShift || N.getOpcode() == ISD::SRL) &&
2909 isa<ConstantSDNode>(N.getOperand(1))) {
2910 SDValue N0 = N.getOperand(0);
2911 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
2912 isa<ConstantSDNode>(N0.getOperand(1))) {
2913 uint64_t Mask = N0.getConstantOperandVal(1);
2914 if (isShiftedMask_64(Mask)) {
2915 unsigned C1 = N.getConstantOperandVal(1);
2916 unsigned XLen = Subtarget->getXLen();
2917 unsigned Leading = XLen - llvm::bit_width(Mask);
2918 unsigned Trailing = llvm::countr_zero(Mask);
2919 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
2920 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
2921 if (LeftShift && Leading == 32 && Trailing > 0 &&
2922 (Trailing + C1) == ShAmt) {
2923 SDLoc DL(N);
2924 EVT VT = N.getValueType();
2926 RISCV::SRLIW, DL, VT, N0.getOperand(0),
2927 CurDAG->getTargetConstant(Trailing, DL, VT)),
2928 0);
2929 return true;
2930 }
2931 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
2932 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
2933 if (!LeftShift && Leading == 32 && Trailing > C1 &&
2934 (Trailing - C1) == ShAmt) {
2935 SDLoc DL(N);
2936 EVT VT = N.getValueType();
2938 RISCV::SRLIW, DL, VT, N0.getOperand(0),
2939 CurDAG->getTargetConstant(Trailing, DL, VT)),
2940 0);
2941 return true;
2942 }
2943 }
2944 }
2945 }
2946
2947 return false;
2948}
2949
2950/// Look for various patterns that can be done with a SHL that can be folded
2951/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
2952/// SHXADD_UW we are trying to match.
2954 SDValue &Val) {
2955 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
2956 N.hasOneUse()) {
2957 SDValue N0 = N.getOperand(0);
2958 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2959 N0.hasOneUse()) {
2960 uint64_t Mask = N.getConstantOperandVal(1);
2961 unsigned C2 = N0.getConstantOperandVal(1);
2962
2963 Mask &= maskTrailingZeros<uint64_t>(C2);
2964
2965 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
2966 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
2967 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
2968 if (isShiftedMask_64(Mask)) {
2969 unsigned Leading = llvm::countl_zero(Mask);
2970 unsigned Trailing = llvm::countr_zero(Mask);
2971 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
2972 SDLoc DL(N);
2973 EVT VT = N.getValueType();
2975 RISCV::SLLI, DL, VT, N0.getOperand(0),
2976 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
2977 0);
2978 return true;
2979 }
2980 }
2981 }
2982 }
2983
2984 return false;
2985}
2986
2987static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
2988 unsigned Bits,
2989 const TargetInstrInfo *TII) {
2990 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
2991
2992 if (!MCOpcode)
2993 return false;
2994
2995 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
2996 const uint64_t TSFlags = MCID.TSFlags;
2997 if (!RISCVII::hasSEWOp(TSFlags))
2998 return false;
2999 assert(RISCVII::hasVLOp(TSFlags));
3000
3001 bool HasGlueOp = User->getGluedNode() != nullptr;
3002 unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
3003 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
3004 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
3005 unsigned VLIdx =
3006 User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3007 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
3008
3009 if (UserOpNo == VLIdx)
3010 return false;
3011
3012 auto NumDemandedBits =
3013 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
3014 return NumDemandedBits && Bits >= *NumDemandedBits;
3015}
3016
3017// Return true if all users of this SDNode* only consume the lower \p Bits.
3018// This can be used to form W instructions for add/sub/mul/shl even when the
3019// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3020// SimplifyDemandedBits has made it so some users see a sext_inreg and some
3021// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3022// the add/sub/mul/shl to become non-W instructions. By checking the users we
3023// may be able to use a W instruction and CSE with the other instruction if
3024// this has happened. We could try to detect that the CSE opportunity exists
3025// before doing this, but that would be more complicated.
3027 const unsigned Depth) const {
3028 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3029 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3030 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3031 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3032 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3033 isa<ConstantSDNode>(Node) || Depth != 0) &&
3034 "Unexpected opcode");
3035
3037 return false;
3038
3039 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3040 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3041 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
3042 return false;
3043
3044 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
3045 SDNode *User = *UI;
3046 // Users of this node should have already been instruction selected
3047 if (!User->isMachineOpcode())
3048 return false;
3049
3050 // TODO: Add more opcodes?
3051 switch (User->getMachineOpcode()) {
3052 default:
3053 if (vectorPseudoHasAllNBitUsers(User, UI.getOperandNo(), Bits, TII))
3054 break;
3055 return false;
3056 case RISCV::ADDW:
3057 case RISCV::ADDIW:
3058 case RISCV::SUBW:
3059 case RISCV::MULW:
3060 case RISCV::SLLW:
3061 case RISCV::SLLIW:
3062 case RISCV::SRAW:
3063 case RISCV::SRAIW:
3064 case RISCV::SRLW:
3065 case RISCV::SRLIW:
3066 case RISCV::DIVW:
3067 case RISCV::DIVUW:
3068 case RISCV::REMW:
3069 case RISCV::REMUW:
3070 case RISCV::ROLW:
3071 case RISCV::RORW:
3072 case RISCV::RORIW:
3073 case RISCV::CLZW:
3074 case RISCV::CTZW:
3075 case RISCV::CPOPW:
3076 case RISCV::SLLI_UW:
3077 case RISCV::FMV_W_X:
3078 case RISCV::FCVT_H_W:
3079 case RISCV::FCVT_H_WU:
3080 case RISCV::FCVT_S_W:
3081 case RISCV::FCVT_S_WU:
3082 case RISCV::FCVT_D_W:
3083 case RISCV::FCVT_D_WU:
3084 case RISCV::TH_REVW:
3085 case RISCV::TH_SRRIW:
3086 if (Bits < 32)
3087 return false;
3088 break;
3089 case RISCV::SLL:
3090 case RISCV::SRA:
3091 case RISCV::SRL:
3092 case RISCV::ROL:
3093 case RISCV::ROR:
3094 case RISCV::BSET:
3095 case RISCV::BCLR:
3096 case RISCV::BINV:
3097 // Shift amount operands only use log2(Xlen) bits.
3098 if (UI.getOperandNo() != 1 || Bits < Log2_32(Subtarget->getXLen()))
3099 return false;
3100 break;
3101 case RISCV::SLLI:
3102 // SLLI only uses the lower (XLen - ShAmt) bits.
3103 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
3104 return false;
3105 break;
3106 case RISCV::ANDI:
3107 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
3108 break;
3109 goto RecCheck;
3110 case RISCV::ORI: {
3111 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3112 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
3113 break;
3114 [[fallthrough]];
3115 }
3116 case RISCV::AND:
3117 case RISCV::OR:
3118 case RISCV::XOR:
3119 case RISCV::XORI:
3120 case RISCV::ANDN:
3121 case RISCV::ORN:
3122 case RISCV::XNOR:
3123 case RISCV::SH1ADD:
3124 case RISCV::SH2ADD:
3125 case RISCV::SH3ADD:
3126 RecCheck:
3127 if (hasAllNBitUsers(User, Bits, Depth + 1))
3128 break;
3129 return false;
3130 case RISCV::SRLI: {
3131 unsigned ShAmt = User->getConstantOperandVal(1);
3132 // If we are shifting right by less than Bits, and users don't demand any
3133 // bits that were shifted into [Bits-1:0], then we can consider this as an
3134 // N-Bit user.
3135 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
3136 break;
3137 return false;
3138 }
3139 case RISCV::SEXT_B:
3140 case RISCV::PACKH:
3141 if (Bits < 8)
3142 return false;
3143 break;
3144 case RISCV::SEXT_H:
3145 case RISCV::FMV_H_X:
3146 case RISCV::ZEXT_H_RV32:
3147 case RISCV::ZEXT_H_RV64:
3148 case RISCV::PACKW:
3149 if (Bits < 16)
3150 return false;
3151 break;
3152 case RISCV::PACK:
3153 if (Bits < (Subtarget->getXLen() / 2))
3154 return false;
3155 break;
3156 case RISCV::ADD_UW:
3157 case RISCV::SH1ADD_UW:
3158 case RISCV::SH2ADD_UW:
3159 case RISCV::SH3ADD_UW:
3160 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3161 // 32 bits.
3162 if (UI.getOperandNo() != 0 || Bits < 32)
3163 return false;
3164 break;
3165 case RISCV::SB:
3166 if (UI.getOperandNo() != 0 || Bits < 8)
3167 return false;
3168 break;
3169 case RISCV::SH:
3170 if (UI.getOperandNo() != 0 || Bits < 16)
3171 return false;
3172 break;
3173 case RISCV::SW:
3174 if (UI.getOperandNo() != 0 || Bits < 32)
3175 return false;
3176 break;
3177 }
3178 }
3179
3180 return true;
3181}
3182
3183// Select a constant that can be represented as (sign_extend(imm5) << imm2).
3185 SDValue &Shl2) {
3186 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3187 int64_t Offset = C->getSExtValue();
3188 int64_t Shift;
3189 for (Shift = 0; Shift < 4; Shift++)
3190 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
3191 break;
3192
3193 // Constant cannot be encoded.
3194 if (Shift == 4)
3195 return false;
3196
3197 EVT Ty = N->getValueType(0);
3198 Simm5 = CurDAG->getTargetConstant(Offset >> Shift, SDLoc(N), Ty);
3199 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
3200 return true;
3201 }
3202
3203 return false;
3204}
3205
3206// Select VL as a 5 bit immediate or a value that will become a register. This
3207// allows us to choose betwen VSETIVLI or VSETVLI later.
3209 auto *C = dyn_cast<ConstantSDNode>(N);
3210 if (C && isUInt<5>(C->getZExtValue())) {
3211 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
3212 N->getValueType(0));
3213 } else if (C && C->isAllOnes()) {
3214 // Treat all ones as VLMax.
3216 N->getValueType(0));
3217 } else if (isa<RegisterSDNode>(N) &&
3218 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
3219 // All our VL operands use an operand that allows GPRNoX0 or an immediate
3220 // as the register class. Convert X0 to a special immediate to pass the
3221 // MachineVerifier. This is recognized specially by the vsetvli insertion
3222 // pass.
3224 N->getValueType(0));
3225 } else {
3226 VL = N;
3227 }
3228
3229 return true;
3230}
3231
3233 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
3234 if (!N.getOperand(0).isUndef())
3235 return SDValue();
3236 N = N.getOperand(1);
3237 }
3238 SDValue Splat = N;
3239 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
3240 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
3241 !Splat.getOperand(0).isUndef())
3242 return SDValue();
3243 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
3244 return Splat;
3245}
3246
3249 if (!Splat)
3250 return false;
3251
3252 SplatVal = Splat.getOperand(1);
3253 return true;
3254}
3255
3257 SelectionDAG &DAG,
3258 const RISCVSubtarget &Subtarget,
3259 std::function<bool(int64_t)> ValidateImm) {
3261 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
3262 return false;
3263
3264 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
3265 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
3266 "Unexpected splat operand type");
3267
3268 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3269 // type is wider than the resulting vector element type: an implicit
3270 // truncation first takes place. Therefore, perform a manual
3271 // truncation/sign-extension in order to ignore any truncated bits and catch
3272 // any zero-extended immediate.
3273 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3274 // sign-extending to (XLenVT -1).
3275 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
3276
3277 int64_t SplatImm = SplatConst.getSExtValue();
3278
3279 if (!ValidateImm(SplatImm))
3280 return false;
3281
3282 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
3283 return true;
3284}
3285
3287 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
3288 [](int64_t Imm) { return isInt<5>(Imm); });
3289}
3290
3292 return selectVSplatImmHelper(
3293 N, SplatVal, *CurDAG, *Subtarget,
3294 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
3295}
3296
3298 SDValue &SplatVal) {
3299 return selectVSplatImmHelper(
3300 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
3301 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
3302 });
3303}
3304
3306 SDValue &SplatVal) {
3307 return selectVSplatImmHelper(
3308 N, SplatVal, *CurDAG, *Subtarget,
3309 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
3310}
3311
3313 auto IsExtOrTrunc = [](SDValue N) {
3314 switch (N->getOpcode()) {
3315 case ISD::SIGN_EXTEND:
3316 case ISD::ZERO_EXTEND:
3317 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
3318 // inactive elements will be undef.
3320 case RISCVISD::VSEXT_VL:
3321 case RISCVISD::VZEXT_VL:
3322 return true;
3323 default:
3324 return false;
3325 }
3326 };
3327
3328 // We can have multiple nested nodes, so unravel them all if needed.
3329 while (IsExtOrTrunc(N)) {
3330 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
3331 return false;
3332 N = N->getOperand(0);
3333 }
3334
3335 return selectVSplat(N, SplatVal);
3336}
3337
3339 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3340 if (!CFP)
3341 return false;
3342 const APFloat &APF = CFP->getValueAPF();
3343 // td can handle +0.0 already.
3344 if (APF.isPosZero())
3345 return false;
3346
3347 MVT VT = CFP->getSimpleValueType(0);
3348
3349 // Even if this FPImm requires an additional FNEG (i.e. the second element of
3350 // the returned pair is true) we still prefer FLI + FNEG over immediate
3351 // materialization as the latter might generate a longer instruction sequence.
3352 if (static_cast<const RISCVTargetLowering *>(TLI)
3353 ->getLegalZfaFPImm(APF, VT)
3354 .first >= 0)
3355 return false;
3356
3357 MVT XLenVT = Subtarget->getXLenVT();
3358 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3359 assert(APF.isNegZero() && "Unexpected constant.");
3360 return false;
3361 }
3362 SDLoc DL(N);
3363 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3364 *Subtarget);
3365 return true;
3366}
3367
3369 SDValue &Imm) {
3370 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3371 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3372
3373 if (!isInt<5>(ImmVal))
3374 return false;
3375
3376 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());
3377 return true;
3378 }
3379
3380 return false;
3381}
3382
3383// Try to remove sext.w if the input is a W instruction or can be made into
3384// a W instruction cheaply.
3385bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3386 // Look for the sext.w pattern, addiw rd, rs1, 0.
3387 if (N->getMachineOpcode() != RISCV::ADDIW ||
3388 !isNullConstant(N->getOperand(1)))
3389 return false;
3390
3391 SDValue N0 = N->getOperand(0);
3392 if (!N0.isMachineOpcode())
3393 return false;
3394
3395 switch (N0.getMachineOpcode()) {
3396 default:
3397 break;
3398 case RISCV::ADD:
3399 case RISCV::ADDI:
3400 case RISCV::SUB:
3401 case RISCV::MUL:
3402 case RISCV::SLLI: {
3403 // Convert sext.w+add/sub/mul to their W instructions. This will create
3404 // a new independent instruction. This improves latency.
3405 unsigned Opc;
3406 switch (N0.getMachineOpcode()) {
3407 default:
3408 llvm_unreachable("Unexpected opcode!");
3409 case RISCV::ADD: Opc = RISCV::ADDW; break;
3410 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3411 case RISCV::SUB: Opc = RISCV::SUBW; break;
3412 case RISCV::MUL: Opc = RISCV::MULW; break;
3413 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3414 }
3415
3416 SDValue N00 = N0.getOperand(0);
3417 SDValue N01 = N0.getOperand(1);
3418
3419 // Shift amount needs to be uimm5.
3420 if (N0.getMachineOpcode() == RISCV::SLLI &&
3421 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
3422 break;
3423
3424 SDNode *Result =
3425 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
3426 N00, N01);
3427 ReplaceUses(N, Result);
3428 return true;
3429 }
3430 case RISCV::ADDW:
3431 case RISCV::ADDIW:
3432 case RISCV::SUBW:
3433 case RISCV::MULW:
3434 case RISCV::SLLIW:
3435 case RISCV::PACKW:
3436 case RISCV::TH_MULAW:
3437 case RISCV::TH_MULAH:
3438 case RISCV::TH_MULSW:
3439 case RISCV::TH_MULSH:
3440 if (N0.getValueType() == MVT::i32)
3441 break;
3442
3443 // Result is already sign extended just remove the sext.w.
3444 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
3445 ReplaceUses(N, N0.getNode());
3446 return true;
3447 }
3448
3449 return false;
3450}
3451
3452static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
3453 // Check that we're using V0 as a mask register.
3454 if (!isa<RegisterSDNode>(MaskOp) ||
3455 cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)
3456 return false;
3457
3458 // The glued user defines V0.
3459 const auto *Glued = GlueOp.getNode();
3460
3461 if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
3462 return false;
3463
3464 // Check that we're defining V0 as a mask register.
3465 if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
3466 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
3467 return false;
3468
3469 // Check the instruction defining V0; it needs to be a VMSET pseudo.
3470 SDValue MaskSetter = Glued->getOperand(2);
3471
3472 // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
3473 // from an extract_subvector or insert_subvector.
3474 if (MaskSetter->isMachineOpcode() &&
3475 MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)
3476 MaskSetter = MaskSetter->getOperand(0);
3477
3478 const auto IsVMSet = [](unsigned Opc) {
3479 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
3480 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
3481 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
3482 Opc == RISCV::PseudoVMSET_M_B8;
3483 };
3484
3485 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3486 // undefined behaviour if it's the wrong bitwidth, so we could choose to
3487 // assume that it's all-ones? Same applies to its VL.
3488 return MaskSetter->isMachineOpcode() &&
3489 IsVMSet(MaskSetter.getMachineOpcode());
3490}
3491
3492// Return true if we can make sure mask of N is all-ones mask.
3493static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
3494 return usesAllOnesMask(N->getOperand(MaskOpIdx),
3495 N->getOperand(N->getNumOperands() - 1));
3496}
3497
3498static bool isImplicitDef(SDValue V) {
3499 if (!V.isMachineOpcode())
3500 return false;
3501 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
3502 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
3503 if (!isImplicitDef(V.getOperand(I)))
3504 return false;
3505 return true;
3506 }
3507 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
3508}
3509
3510// Optimize masked RVV pseudo instructions with a known all-ones mask to their
3511// corresponding "unmasked" pseudo versions. The mask we're interested in will
3512// take the form of a V0 physical register operand, with a glued
3513// register-setting instruction.
3514bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
3516 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
3517 if (!I)
3518 return false;
3519
3520 unsigned MaskOpIdx = I->MaskOpIdx;
3521 if (!usesAllOnesMask(N, MaskOpIdx))
3522 return false;
3523
3524 // There are two classes of pseudos in the table - compares and
3525 // everything else. See the comment on RISCVMaskedPseudo for details.
3526 const unsigned Opc = I->UnmaskedPseudo;
3527 const MCInstrDesc &MCID = TII->get(Opc);
3528 const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags);
3529#ifndef NDEBUG
3530 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
3533 "Masked and unmasked pseudos are inconsistent");
3534 const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID);
3535 assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure");
3536#endif
3537
3539 // Skip the merge operand at index 0 if !UseTUPseudo.
3540 for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) {
3541 // Skip the mask, and the Glue.
3542 SDValue Op = N->getOperand(I);
3543 if (I == MaskOpIdx || Op.getValueType() == MVT::Glue)
3544 continue;
3545 Ops.push_back(Op);
3546 }
3547
3548 // Transitively apply any node glued to our new node.
3549 const auto *Glued = N->getGluedNode();
3550 if (auto *TGlued = Glued->getGluedNode())
3551 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
3552
3554 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3555
3556 if (!N->memoperands_empty())
3557 CurDAG->setNodeMemRefs(Result, N->memoperands());
3558
3559 Result->setFlags(N->getFlags());
3560 ReplaceUses(N, Result);
3561
3562 return true;
3563}
3564
3565static bool IsVMerge(SDNode *N) {
3566 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
3567}
3568
3569static bool IsVMv(SDNode *N) {
3570 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V;
3571}
3572
3573static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) {
3574 switch (LMUL) {
3575 case RISCVII::LMUL_F8:
3576 return RISCV::PseudoVMSET_M_B1;
3577 case RISCVII::LMUL_F4:
3578 return RISCV::PseudoVMSET_M_B2;
3579 case RISCVII::LMUL_F2:
3580 return RISCV::PseudoVMSET_M_B4;
3581 case RISCVII::LMUL_1:
3582 return RISCV::PseudoVMSET_M_B8;
3583 case RISCVII::LMUL_2:
3584 return RISCV::PseudoVMSET_M_B16;
3585 case RISCVII::LMUL_4:
3586 return RISCV::PseudoVMSET_M_B32;
3587 case RISCVII::LMUL_8:
3588 return RISCV::PseudoVMSET_M_B64;
3590 llvm_unreachable("Unexpected LMUL");
3591 }
3592 llvm_unreachable("Unknown VLMUL enum");
3593}
3594
3595// Try to fold away VMERGE_VVM instructions into their true operands:
3596//
3597// %true = PseudoVADD_VV ...
3598// %x = PseudoVMERGE_VVM %false, %false, %true, %mask
3599// ->
3600// %x = PseudoVADD_VV_MASK %false, ..., %mask
3601//
3602// We can only fold if vmerge's merge operand, vmerge's false operand and
3603// %true's merge operand (if it has one) are the same. This is because we have
3604// to consolidate them into one merge operand in the result.
3605//
3606// If %true is masked, then we can use its mask instead of vmerge's if vmerge's
3607// mask is all ones.
3608//
3609// We can also fold a VMV_V_V into its true operand, since it is equivalent to a
3610// VMERGE_VVM with an all ones mask.
3611//
3612// The resulting VL is the minimum of the two VLs.
3613//
3614// The resulting policy is the effective policy the vmerge would have had,
3615// i.e. whether or not it's merge operand was implicit-def.
3616bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
3617 SDValue Merge, False, True, VL, Mask, Glue;
3618 // A vmv.v.v is equivalent to a vmerge with an all-ones mask.
3619 if (IsVMv(N)) {
3620 Merge = N->getOperand(0);
3621 False = N->getOperand(0);
3622 True = N->getOperand(1);
3623 VL = N->getOperand(2);
3624 // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones
3625 // mask later below.
3626 } else {
3627 assert(IsVMerge(N));
3628 Merge = N->getOperand(0);
3629 False = N->getOperand(1);
3630 True = N->getOperand(2);
3631 Mask = N->getOperand(3);
3632 VL = N->getOperand(4);
3633 // We always have a glue node for the mask at v0.
3634 Glue = N->getOperand(N->getNumOperands() - 1);
3635 }
3636 assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
3637 assert(!Glue || Glue.getValueType() == MVT::Glue);
3638
3639 // We require that either merge and false are the same, or that merge
3640 // is undefined.
3641 if (Merge != False && !isImplicitDef(Merge))
3642 return false;
3643
3644 assert(True.getResNo() == 0 &&
3645 "Expect True is the first output of an instruction.");
3646
3647 // Need N is the exactly one using True.
3648 if (!True.hasOneUse())
3649 return false;
3650
3651 if (!True.isMachineOpcode())
3652 return false;
3653
3654 unsigned TrueOpc = True.getMachineOpcode();
3655 const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
3656 uint64_t TrueTSFlags = TrueMCID.TSFlags;
3657 bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
3658
3659 bool IsMasked = false;
3661 RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
3662 if (!Info && HasTiedDest) {
3663 Info = RISCV::getMaskedPseudoInfo(TrueOpc);
3664 IsMasked = true;
3665 }
3666
3667 if (!Info)
3668 return false;
3669
3670 // When Mask is not a true mask, this transformation is illegal for some
3671 // operations whose results are affected by mask, like viota.m.
3672 if (Info->MaskAffectsResult && Mask && !usesAllOnesMask(Mask, Glue))
3673 return false;
3674
3675 // If True has a merge operand then it needs to be the same as vmerge's False,
3676 // since False will be used for the result's merge operand.
3677 if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
3678 // The vmerge instruction must be TU.
3679 // FIXME: This could be relaxed, but we need to handle the policy for the
3680 // resulting op correctly.
3681 if (isImplicitDef(Merge))
3682 return false;
3683 SDValue MergeOpTrue = True->getOperand(0);
3684 if (False != MergeOpTrue)
3685 return false;
3686 }
3687
3688 // If True is masked then the vmerge must have an all 1s mask, since we're
3689 // going to keep the mask from True.
3690 if (IsMasked) {
3691 assert(HasTiedDest && "Expected tied dest");
3692 // The vmerge instruction must be TU.
3693 if (isImplicitDef(Merge))
3694 return false;
3695 // FIXME: Support mask agnostic True instruction which would have an
3696 // undef merge operand.
3697 if (Mask && !usesAllOnesMask(Mask, Glue))
3698 return false;
3699 }
3700
3701 // Skip if True has side effect.
3702 if (TII->get(TrueOpc).hasUnmodeledSideEffects())
3703 return false;
3704
3705 // The last operand of a masked instruction may be glued.
3706 bool HasGlueOp = True->getGluedNode() != nullptr;
3707
3708 // The chain operand may exist either before the glued operands or in the last
3709 // position.
3710 unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
3711 bool HasChainOp =
3712 True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
3713
3714 if (HasChainOp) {
3715 // Avoid creating cycles in the DAG. We must ensure that none of the other
3716 // operands depend on True through it's Chain.
3717 SmallVector<const SDNode *, 4> LoopWorklist;
3719 LoopWorklist.push_back(False.getNode());
3720 if (Mask)
3721 LoopWorklist.push_back(Mask.getNode());
3722 LoopWorklist.push_back(VL.getNode());
3723 if (Glue)
3724 LoopWorklist.push_back(Glue.getNode());
3725 if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
3726 return false;
3727 }
3728
3729 // The vector policy operand may be present for masked intrinsics
3730 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
3731 unsigned TrueVLIndex =
3732 True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3733 SDValue TrueVL = True.getOperand(TrueVLIndex);
3734 SDValue SEW = True.getOperand(TrueVLIndex + 1);
3735
3736 auto GetMinVL = [](SDValue LHS, SDValue RHS) {
3737 if (LHS == RHS)
3738 return LHS;
3739 if (isAllOnesConstant(LHS))
3740 return RHS;
3741 if (isAllOnesConstant(RHS))
3742 return LHS;
3743 auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
3744 auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
3745 if (!CLHS || !CRHS)
3746 return SDValue();
3747 return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
3748 };
3749
3750 // Because N and True must have the same merge operand (or True's operand is
3751 // implicit_def), the "effective" body is the minimum of their VLs.
3752 SDValue OrigVL = VL;
3753 VL = GetMinVL(TrueVL, VL);
3754 if (!VL)
3755 return false;
3756
3757 // If we end up changing the VL or mask of True, then we need to make sure it
3758 // doesn't raise any observable fp exceptions, since changing the active
3759 // elements will affect how fflags is set.
3760 if (TrueVL != VL || !IsMasked)
3761 if (mayRaiseFPException(True.getNode()) &&
3762 !True->getFlags().hasNoFPExcept())
3763 return false;
3764
3765 SDLoc DL(N);
3766
3767 // From the preconditions we checked above, we know the mask and thus glue
3768 // for the result node will be taken from True.
3769 if (IsMasked) {
3770 Mask = True->getOperand(Info->MaskOpIdx);
3771 Glue = True->getOperand(True->getNumOperands() - 1);
3772 assert(Glue.getValueType() == MVT::Glue);
3773 }
3774 // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create
3775 // an all-ones mask to use.
3776 else if (IsVMv(N)) {
3777 unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags;
3778 unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags));
3779 ElementCount EC = N->getValueType(0).getVectorElementCount();
3780 MVT MaskVT = MVT::getVectorVT(MVT::i1, EC);
3781
3782 SDValue AllOnesMask =
3783 SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0);
3785 RISCV::V0, AllOnesMask, SDValue());
3786 Mask = CurDAG->getRegister(RISCV::V0, MaskVT);
3787 Glue = MaskCopy.getValue(1);
3788 }
3789
3790 unsigned MaskedOpc = Info->MaskedPseudo;
3791#ifndef NDEBUG
3792 const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
3794 "Expected instructions with mask have policy operand.");
3795 assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
3796 MCOI::TIED_TO) == 0 &&
3797 "Expected instructions with mask have a tied dest.");
3798#endif
3799
3800 // Use a tumu policy, relaxing it to tail agnostic provided that the merge
3801 // operand is undefined.
3802 //
3803 // However, if the VL became smaller than what the vmerge had originally, then
3804 // elements past VL that were previously in the vmerge's body will have moved
3805 // to the tail. In that case we always need to use tail undisturbed to
3806 // preserve them.
3807 bool MergeVLShrunk = VL != OrigVL;
3808 uint64_t Policy = (isImplicitDef(Merge) && !MergeVLShrunk)
3810 : /*TUMU*/ 0;
3811 SDValue PolicyOp =
3812 CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
3813
3814
3816 Ops.push_back(False);
3817
3818 const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
3819 const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode;
3820 assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx);
3821 Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
3822
3823 Ops.push_back(Mask);
3824
3825 // For unmasked "VOp" with rounding mode operand, that is interfaces like
3826 // (..., rm, vl) or (..., rm, vl, policy).
3827 // Its masked version is (..., vm, rm, vl, policy).
3828 // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
3829 if (HasRoundingMode)
3830 Ops.push_back(True->getOperand(TrueVLIndex - 1));
3831
3832 Ops.append({VL, SEW, PolicyOp});
3833
3834 // Result node should have chain operand of True.
3835 if (HasChainOp)
3836 Ops.push_back(True.getOperand(TrueChainOpIdx));
3837
3838 // Add the glue for the CopyToReg of mask->v0.
3839 Ops.push_back(Glue);
3840
3842 CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
3843 Result->setFlags(True->getFlags());
3844
3845 if (!cast<MachineSDNode>(True)->memoperands_empty())
3846 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
3847
3848 // Replace vmerge.vvm node by Result.
3849 ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
3850
3851 // Replace another value of True. E.g. chain and VL.
3852 for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
3853 ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
3854
3855 return true;
3856}
3857
3858bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
3859 bool MadeChange = false;
3861
3862 while (Position != CurDAG->allnodes_begin()) {
3863 SDNode *N = &*--Position;
3864 if (N->use_empty() || !N->isMachineOpcode())
3865 continue;
3866
3867 if (IsVMerge(N) || IsVMv(N))
3868 MadeChange |= performCombineVMergeAndVOps(N);
3869 }
3870 return MadeChange;
3871}
3872
3873/// If our passthru is an implicit_def, use noreg instead. This side
3874/// steps issues with MachineCSE not being able to CSE expressions with
3875/// IMPLICIT_DEF operands while preserving the semantic intent. See
3876/// pr64282 for context. Note that this transform is the last one
3877/// performed at ISEL DAG to DAG.
3878bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
3879 bool MadeChange = false;
3881
3882 while (Position != CurDAG->allnodes_begin()) {
3883 SDNode *N = &*--Position;
3884 if (N->use_empty() || !N->isMachineOpcode())
3885 continue;
3886
3887 const unsigned Opc = N->getMachineOpcode();
3888 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
3890 !isImplicitDef(N->getOperand(0)))
3891 continue;
3892
3894 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
3895 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
3896 SDValue Op = N->getOperand(I);
3897 Ops.push_back(Op);
3898 }
3899
3901 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3902 Result->setFlags(N->getFlags());
3903 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
3904 ReplaceUses(N, Result);
3905 MadeChange = true;
3906 }
3907 return MadeChange;
3908}
3909
3910
3911// This pass converts a legalized DAG into a RISCV-specific DAG, ready
3912// for instruction scheduling.
3914 CodeGenOptLevel OptLevel) {
3915 return new RISCVDAGToDAGISel(TM, OptLevel);
3916}
3917
3918char RISCVDAGToDAGISel::ID = 0;
3919
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
R600 Clause Merge
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp)
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false)
static bool IsVMv(SDNode *N)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b)
static SDValue findVSplat(SDValue N)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm)
static bool IsVMerge(SDNode *N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define PASS_NAME
Value * RHS
Value * LHS
bool isZero() const
Definition: APFloat.h:1291
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
bool isPosZero() const
Definition: APFloat.h:1306
bool isNegZero() const
Definition: APFloat.h:1307
Class for arbitrary precision integers.
Definition: APInt.h:76
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1446
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1520
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
This class is used to form a handle around another node that is persistent and is updated across invo...
static StringRef getMemConstraintName(ConstraintCode C)
Definition: InlineAsm.h:467
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
Definition: MCInstrDesc.h:463
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
void selectVSSEG(SDNode *Node, bool IsMasked, bool IsStrided)
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset shoule be all zeros.
bool SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void selectVLSEGFF(SDNode *Node, bool IsMasked)
bool selectFPImm(SDValue N, SDValue &Imm)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool hasAllBUsers(SDNode *Node) const
void selectVLXSEG(SDNode *Node, bool IsMasked, bool IsOrdered)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
void selectVSXSEG(SDNode *Node, bool IsMasked, bool IsOrdered)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset, bool IsINX=false)
void selectVLSEG(SDNode *Node, bool IsMasked, bool IsStrided)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
unsigned getXLen() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVII::VLMUL getLMUL(MVT VT)
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
const TargetLowering * TLI
MachineFunction * MF
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
bool mayRaiseFPException(SDNode *Node) const
Return whether the node may raise an FP exception.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:723
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:552
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:448
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:532
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:533
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:473
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:728
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:774
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:677
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:800
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:561
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:555
ilist< SDNode >::iterator allnodes_iterator
Definition: SelectionDAG.h:535
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
TargetInstrInfo - Interface to description of machine instruction set.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:345
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:751
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:560
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1248
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:240
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1038
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:784
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:904
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:775
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1228
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1244
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:628
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:706
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:574
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:781
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:799
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:681
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1484
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1535
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1580
static bool hasRoundModeOp(uint64_t TSFlags)
static VLMUL getLMul(uint64_t TSFlags)
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul)
unsigned encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic)
std::optional< unsigned > getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
static const MachineMemOperand::Flags MONontemporalBit1
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:239
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:269
unsigned M1(unsigned Val)
Definition: VE.h:376
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:257
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
@ Add
Sum of integers.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:465
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
#define N
This struct is a compact representation of