LLVM 19.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
20#include "llvm/IR/IntrinsicsRISCV.h"
22#include "llvm/Support/Debug.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE "riscv-isel"
29#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
30
32 "riscv-use-rematerializable-movimm", cl::Hidden,
33 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
34 "constant materialization"),
35 cl::init(false));
36
37namespace llvm::RISCV {
38#define GET_RISCVVSSEGTable_IMPL
39#define GET_RISCVVLSEGTable_IMPL
40#define GET_RISCVVLXSEGTable_IMPL
41#define GET_RISCVVSXSEGTable_IMPL
42#define GET_RISCVVLETable_IMPL
43#define GET_RISCVVSETable_IMPL
44#define GET_RISCVVLXTable_IMPL
45#define GET_RISCVVSXTable_IMPL
46#include "RISCVGenSearchableTables.inc"
47} // namespace llvm::RISCV
48
51
52 bool MadeChange = false;
53 while (Position != CurDAG->allnodes_begin()) {
54 SDNode *N = &*--Position;
55 if (N->use_empty())
56 continue;
57
58 SDValue Result;
59 switch (N->getOpcode()) {
60 case ISD::SPLAT_VECTOR: {
61 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
62 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
63 MVT VT = N->getSimpleValueType(0);
64 unsigned Opc =
66 SDLoc DL(N);
67 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
68 SDValue Src = N->getOperand(0);
69 if (VT.isInteger())
70 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
71 N->getOperand(0));
72 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
73 break;
74 }
76 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
77 // load. Done after lowering and combining so that we have a chance to
78 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
79 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
80 MVT VT = N->getSimpleValueType(0);
81 SDValue Passthru = N->getOperand(0);
82 SDValue Lo = N->getOperand(1);
83 SDValue Hi = N->getOperand(2);
84 SDValue VL = N->getOperand(3);
85 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
86 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
87 "Unexpected VTs!");
89 SDLoc DL(N);
90
91 // Create temporary stack for each expanding node.
92 SDValue StackSlot =
94 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
96
97 SDValue Chain = CurDAG->getEntryNode();
98 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
99
100 SDValue OffsetSlot =
102 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
103 Align(8));
104
105 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
106
107 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
108 SDValue IntID =
109 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
110 SDValue Ops[] = {Chain,
111 IntID,
112 Passthru,
113 StackSlot,
114 CurDAG->getRegister(RISCV::X0, MVT::i64),
115 VL};
116
118 MVT::i64, MPI, Align(8),
120 break;
121 }
122 }
123
124 if (Result) {
125 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
126 LLVM_DEBUG(N->dump(CurDAG));
127 LLVM_DEBUG(dbgs() << "\nNew: ");
128 LLVM_DEBUG(Result->dump(CurDAG));
129 LLVM_DEBUG(dbgs() << "\n");
130
132 MadeChange = true;
133 }
134 }
135
136 if (MadeChange)
138}
139
141 HandleSDNode Dummy(CurDAG->getRoot());
143
144 bool MadeChange = false;
145 while (Position != CurDAG->allnodes_begin()) {
146 SDNode *N = &*--Position;
147 // Skip dead nodes and any non-machine opcodes.
148 if (N->use_empty() || !N->isMachineOpcode())
149 continue;
150
151 MadeChange |= doPeepholeSExtW(N);
152
153 // FIXME: This is here only because the VMerge transform doesn't
154 // know how to handle masked true inputs. Once that has been moved
155 // to post-ISEL, this can be deleted as well.
156 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
157 }
158
159 CurDAG->setRoot(Dummy.getValue());
160
161 MadeChange |= doPeepholeMergeVVMFold();
162
163 // After we're done with everything else, convert IMPLICIT_DEF
164 // passthru operands to NoRegister. This is required to workaround
165 // an optimization deficiency in MachineCSE. This really should
166 // be merged back into each of the patterns (i.e. there's no good
167 // reason not to go directly to NoReg), but is being done this way
168 // to allow easy backporting.
169 MadeChange |= doPeepholeNoRegPassThru();
170
171 if (MadeChange)
173}
174
175static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
177 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
178 for (const RISCVMatInt::Inst &Inst : Seq) {
179 SDValue SDImm = CurDAG->getTargetConstant(Inst.getImm(), DL, VT);
180 SDNode *Result = nullptr;
181 switch (Inst.getOpndKind()) {
182 case RISCVMatInt::Imm:
183 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
184 break;
186 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
187 CurDAG->getRegister(RISCV::X0, VT));
188 break;
190 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
191 break;
193 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
194 break;
195 }
196
197 // Only the first instruction has X0 as its source.
198 SrcReg = SDValue(Result, 0);
199 }
200
201 return SrcReg;
202}
203
204static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
205 int64_t Imm, const RISCVSubtarget &Subtarget) {
207
208 // Use a rematerializable pseudo instruction for short sequences if enabled.
209 if (Seq.size() == 2 && UsePseudoMovImm)
210 return SDValue(
211 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
212 CurDAG->getTargetConstant(Imm, DL, VT)),
213 0);
214
215 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
216 // worst an LUI+ADDIW. This will require an extra register, but avoids a
217 // constant pool.
218 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
219 // low and high 32 bits are the same and bit 31 and 63 are set.
220 if (Seq.size() > 3) {
221 unsigned ShiftAmt, AddOpc;
223 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
224 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
225 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
226
227 SDValue SLLI = SDValue(
228 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
229 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
230 0);
231 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
232 }
233 }
234
235 // Otherwise, use the original sequence.
236 return selectImmSeq(CurDAG, DL, VT, Seq);
237}
238
240 unsigned NF, RISCVII::VLMUL LMUL) {
241 static const unsigned M1TupleRegClassIDs[] = {
242 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
243 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
244 RISCV::VRN8M1RegClassID};
245 static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID,
246 RISCV::VRN3M2RegClassID,
247 RISCV::VRN4M2RegClassID};
248
249 assert(Regs.size() >= 2 && Regs.size() <= 8);
250
251 unsigned RegClassID;
252 unsigned SubReg0;
253 switch (LMUL) {
254 default:
255 llvm_unreachable("Invalid LMUL.");
260 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
261 "Unexpected subreg numbering");
262 SubReg0 = RISCV::sub_vrm1_0;
263 RegClassID = M1TupleRegClassIDs[NF - 2];
264 break;
266 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
267 "Unexpected subreg numbering");
268 SubReg0 = RISCV::sub_vrm2_0;
269 RegClassID = M2TupleRegClassIDs[NF - 2];
270 break;
272 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
273 "Unexpected subreg numbering");
274 SubReg0 = RISCV::sub_vrm4_0;
275 RegClassID = RISCV::VRN2M4RegClassID;
276 break;
277 }
278
279 SDLoc DL(Regs[0]);
281
282 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
283
284 for (unsigned I = 0; I < Regs.size(); ++I) {
285 Ops.push_back(Regs[I]);
286 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
287 }
288 SDNode *N =
289 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
290 return SDValue(N, 0);
291}
292
294 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
295 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
296 bool IsLoad, MVT *IndexVT) {
297 SDValue Chain = Node->getOperand(0);
298 SDValue Glue;
299
300 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
301
302 if (IsStridedOrIndexed) {
303 Operands.push_back(Node->getOperand(CurOp++)); // Index.
304 if (IndexVT)
305 *IndexVT = Operands.back()->getSimpleValueType(0);
306 }
307
308 if (IsMasked) {
309 // Mask needs to be copied to V0.
310 SDValue Mask = Node->getOperand(CurOp++);
311 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
312 Glue = Chain.getValue(1);
313 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
314 }
315 SDValue VL;
316 selectVLOp(Node->getOperand(CurOp++), VL);
317 Operands.push_back(VL);
318
319 MVT XLenVT = Subtarget->getXLenVT();
320 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
321 Operands.push_back(SEWOp);
322
323 // At the IR layer, all the masked load intrinsics have policy operands,
324 // none of the others do. All have passthru operands. For our pseudos,
325 // all loads have policy operands.
326 if (IsLoad) {
328 if (IsMasked)
329 Policy = Node->getConstantOperandVal(CurOp++);
330 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
331 Operands.push_back(PolicyOp);
332 }
333
334 Operands.push_back(Chain); // Chain.
335 if (Glue)
336 Operands.push_back(Glue);
337}
338
339void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
340 bool IsStrided) {
341 SDLoc DL(Node);
342 unsigned NF = Node->getNumValues() - 1;
343 MVT VT = Node->getSimpleValueType(0);
344 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
346
347 unsigned CurOp = 2;
349
350 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
351 Node->op_begin() + CurOp + NF);
352 SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
353 Operands.push_back(Merge);
354 CurOp += NF;
355
356 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
357 Operands, /*IsLoad=*/true);
358
359 const RISCV::VLSEGPseudo *P =
360 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
361 static_cast<unsigned>(LMUL));
362 MachineSDNode *Load =
363 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
364
365 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
366 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
367
368 SDValue SuperReg = SDValue(Load, 0);
369 for (unsigned I = 0; I < NF; ++I) {
370 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
371 ReplaceUses(SDValue(Node, I),
372 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
373 }
374
375 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
376 CurDAG->RemoveDeadNode(Node);
377}
378
379void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
380 SDLoc DL(Node);
381 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain.
382 MVT VT = Node->getSimpleValueType(0);
383 MVT XLenVT = Subtarget->getXLenVT();
384 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
386
387 unsigned CurOp = 2;
389
390 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
391 Node->op_begin() + CurOp + NF);
392 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
393 Operands.push_back(MaskedOff);
394 CurOp += NF;
395
396 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
397 /*IsStridedOrIndexed*/ false, Operands,
398 /*IsLoad=*/true);
399
400 const RISCV::VLSEGPseudo *P =
401 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
402 Log2SEW, static_cast<unsigned>(LMUL));
403 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
404 XLenVT, MVT::Other, Operands);
405
406 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
407 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
408
409 SDValue SuperReg = SDValue(Load, 0);
410 for (unsigned I = 0; I < NF; ++I) {
411 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
412 ReplaceUses(SDValue(Node, I),
413 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
414 }
415
416 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL
417 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain
418 CurDAG->RemoveDeadNode(Node);
419}
420
421void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
422 bool IsOrdered) {
423 SDLoc DL(Node);
424 unsigned NF = Node->getNumValues() - 1;
425 MVT VT = Node->getSimpleValueType(0);
426 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
428
429 unsigned CurOp = 2;
431
432 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
433 Node->op_begin() + CurOp + NF);
434 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
435 Operands.push_back(MaskedOff);
436 CurOp += NF;
437
438 MVT IndexVT;
439 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
440 /*IsStridedOrIndexed*/ true, Operands,
441 /*IsLoad=*/true, &IndexVT);
442
444 "Element count mismatch");
445
446 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
447 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
448 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
449 report_fatal_error("The V extension does not support EEW=64 for index "
450 "values when XLEN=32");
451 }
452 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
453 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
454 static_cast<unsigned>(IndexLMUL));
455 MachineSDNode *Load =
456 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
457
458 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
459 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
460
461 SDValue SuperReg = SDValue(Load, 0);
462 for (unsigned I = 0; I < NF; ++I) {
463 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
464 ReplaceUses(SDValue(Node, I),
465 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
466 }
467
468 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
469 CurDAG->RemoveDeadNode(Node);
470}
471
472void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked,
473 bool IsStrided) {
474 SDLoc DL(Node);
475 unsigned NF = Node->getNumOperands() - 4;
476 if (IsStrided)
477 NF--;
478 if (IsMasked)
479 NF--;
480 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
481 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
483 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
484 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
485
487 Operands.push_back(StoreVal);
488 unsigned CurOp = 2 + NF;
489
490 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
491 Operands);
492
493 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
494 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
495 MachineSDNode *Store =
496 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
497
498 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
499 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
500
501 ReplaceNode(Node, Store);
502}
503
504void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
505 bool IsOrdered) {
506 SDLoc DL(Node);
507 unsigned NF = Node->getNumOperands() - 5;
508 if (IsMasked)
509 --NF;
510 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
511 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
513 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
514 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
515
517 Operands.push_back(StoreVal);
518 unsigned CurOp = 2 + NF;
519
520 MVT IndexVT;
521 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
522 /*IsStridedOrIndexed*/ true, Operands,
523 /*IsLoad=*/false, &IndexVT);
524
526 "Element count mismatch");
527
528 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
529 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
530 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
531 report_fatal_error("The V extension does not support EEW=64 for index "
532 "values when XLEN=32");
533 }
534 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
535 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
536 static_cast<unsigned>(IndexLMUL));
537 MachineSDNode *Store =
538 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
539
540 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
541 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
542
543 ReplaceNode(Node, Store);
544}
545
547 if (!Subtarget->hasVInstructions())
548 return;
549
550 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
551
552 SDLoc DL(Node);
553 MVT XLenVT = Subtarget->getXLenVT();
554
555 unsigned IntNo = Node->getConstantOperandVal(0);
556
557 assert((IntNo == Intrinsic::riscv_vsetvli ||
558 IntNo == Intrinsic::riscv_vsetvlimax) &&
559 "Unexpected vsetvli intrinsic");
560
561 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
562 unsigned Offset = (VLMax ? 1 : 2);
563
564 assert(Node->getNumOperands() == Offset + 2 &&
565 "Unexpected number of operands");
566
567 unsigned SEW =
568 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
569 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
570 Node->getConstantOperandVal(Offset + 1) & 0x7);
571
572 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
573 /*MaskAgnostic*/ true);
574 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
575
576 SDValue VLOperand;
577 unsigned Opcode = RISCV::PseudoVSETVLI;
578 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
579 if (auto VLEN = Subtarget->getRealVLen())
580 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
581 VLMax = true;
582 }
583 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
584 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
585 Opcode = RISCV::PseudoVSETVLIX0;
586 } else {
587 VLOperand = Node->getOperand(1);
588
589 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
590 uint64_t AVL = C->getZExtValue();
591 if (isUInt<5>(AVL)) {
592 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
593 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
594 XLenVT, VLImm, VTypeIOp));
595 return;
596 }
597 }
598 }
599
600 ReplaceNode(Node,
601 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
602}
603
605 MVT VT = Node->getSimpleValueType(0);
606 unsigned Opcode = Node->getOpcode();
607 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
608 "Unexpected opcode");
609 SDLoc DL(Node);
610
611 // For operations of the form (x << C1) op C2, check if we can use
612 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
613 SDValue N0 = Node->getOperand(0);
614 SDValue N1 = Node->getOperand(1);
615
616 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
617 if (!Cst)
618 return false;
619
620 int64_t Val = Cst->getSExtValue();
621
622 // Check if immediate can already use ANDI/ORI/XORI.
623 if (isInt<12>(Val))
624 return false;
625
626 SDValue Shift = N0;
627
628 // If Val is simm32 and we have a sext_inreg from i32, then the binop
629 // produces at least 33 sign bits. We can peek through the sext_inreg and use
630 // a SLLIW at the end.
631 bool SignExt = false;
632 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
633 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
634 SignExt = true;
635 Shift = N0.getOperand(0);
636 }
637
638 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
639 return false;
640
641 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
642 if (!ShlCst)
643 return false;
644
645 uint64_t ShAmt = ShlCst->getZExtValue();
646
647 // Make sure that we don't change the operation by removing bits.
648 // This only matters for OR and XOR, AND is unaffected.
649 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
650 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
651 return false;
652
653 int64_t ShiftedVal = Val >> ShAmt;
654 if (!isInt<12>(ShiftedVal))
655 return false;
656
657 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
658 if (SignExt && ShAmt >= 32)
659 return false;
660
661 // Ok, we can reorder to get a smaller immediate.
662 unsigned BinOpc;
663 switch (Opcode) {
664 default: llvm_unreachable("Unexpected opcode");
665 case ISD::AND: BinOpc = RISCV::ANDI; break;
666 case ISD::OR: BinOpc = RISCV::ORI; break;
667 case ISD::XOR: BinOpc = RISCV::XORI; break;
668 }
669
670 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
671
672 SDNode *BinOp =
673 CurDAG->getMachineNode(BinOpc, DL, VT, Shift.getOperand(0),
674 CurDAG->getTargetConstant(ShiftedVal, DL, VT));
675 SDNode *SLLI =
676 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
677 CurDAG->getTargetConstant(ShAmt, DL, VT));
678 ReplaceNode(Node, SLLI);
679 return true;
680}
681
683 // Only supported with XTHeadBb at the moment.
684 if (!Subtarget->hasVendorXTHeadBb())
685 return false;
686
687 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
688 if (!N1C)
689 return false;
690
691 SDValue N0 = Node->getOperand(0);
692 if (!N0.hasOneUse())
693 return false;
694
695 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
696 MVT VT) {
697 return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
698 CurDAG->getTargetConstant(Msb, DL, VT),
699 CurDAG->getTargetConstant(Lsb, DL, VT));
700 };
701
702 SDLoc DL(Node);
703 MVT VT = Node->getSimpleValueType(0);
704 const unsigned RightShAmt = N1C->getZExtValue();
705
706 // Transform (sra (shl X, C1) C2) with C1 < C2
707 // -> (TH.EXT X, msb, lsb)
708 if (N0.getOpcode() == ISD::SHL) {
709 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
710 if (!N01C)
711 return false;
712
713 const unsigned LeftShAmt = N01C->getZExtValue();
714 // Make sure that this is a bitfield extraction (i.e., the shift-right
715 // amount can not be less than the left-shift).
716 if (LeftShAmt > RightShAmt)
717 return false;
718
719 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
720 const unsigned Msb = MsbPlusOne - 1;
721 const unsigned Lsb = RightShAmt - LeftShAmt;
722
723 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
724 ReplaceNode(Node, TH_EXT);
725 return true;
726 }
727
728 // Transform (sra (sext_inreg X, _), C) ->
729 // (TH.EXT X, msb, lsb)
730 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
731 unsigned ExtSize =
732 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
733
734 // ExtSize of 32 should use sraiw via tablegen pattern.
735 if (ExtSize == 32)
736 return false;
737
738 const unsigned Msb = ExtSize - 1;
739 const unsigned Lsb = RightShAmt;
740
741 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
742 ReplaceNode(Node, TH_EXT);
743 return true;
744 }
745
746 return false;
747}
748
750 // Target does not support indexed loads.
751 if (!Subtarget->hasVendorXTHeadMemIdx())
752 return false;
753
754 LoadSDNode *Ld = cast<LoadSDNode>(Node);
756 if (AM == ISD::UNINDEXED)
757 return false;
758
759 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
760 if (!C)
761 return false;
762
763 EVT LoadVT = Ld->getMemoryVT();
764 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
765 "Unexpected addressing mode");
766 bool IsPre = AM == ISD::PRE_INC;
767 bool IsPost = AM == ISD::POST_INC;
768 int64_t Offset = C->getSExtValue();
769
770 // The constants that can be encoded in the THeadMemIdx instructions
771 // are of the form (sign_extend(imm5) << imm2).
772 int64_t Shift;
773 for (Shift = 0; Shift < 4; Shift++)
774 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
775 break;
776
777 // Constant cannot be encoded.
778 if (Shift == 4)
779 return false;
780
781 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
782 unsigned Opcode;
783 if (LoadVT == MVT::i8 && IsPre)
784 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
785 else if (LoadVT == MVT::i8 && IsPost)
786 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
787 else if (LoadVT == MVT::i16 && IsPre)
788 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
789 else if (LoadVT == MVT::i16 && IsPost)
790 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
791 else if (LoadVT == MVT::i32 && IsPre)
792 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
793 else if (LoadVT == MVT::i32 && IsPost)
794 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
795 else if (LoadVT == MVT::i64 && IsPre)
796 Opcode = RISCV::TH_LDIB;
797 else if (LoadVT == MVT::i64 && IsPost)
798 Opcode = RISCV::TH_LDIA;
799 else
800 return false;
801
802 EVT Ty = Ld->getOffset().getValueType();
803 SDValue Ops[] = {Ld->getBasePtr(),
804 CurDAG->getTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
805 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty),
806 Ld->getChain()};
807 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
808 Ld->getValueType(1), MVT::Other, Ops);
809
810 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
811 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
812
813 ReplaceNode(Node, New);
814
815 return true;
816}
817
819 if (!Subtarget->hasVInstructions())
820 return;
821
822 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
823
824 SDLoc DL(Node);
825 unsigned IntNo = Node->getConstantOperandVal(1);
826
827 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
828 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
829 "Unexpected vsetvli intrinsic");
830
831 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
832 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
833 SDValue SEWOp =
834 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
835 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
836 Node->getOperand(4), Node->getOperand(5),
837 Node->getOperand(8), SEWOp,
838 Node->getOperand(0)};
839
840 unsigned Opcode;
841 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
842 switch (LMulSDNode->getSExtValue()) {
843 case 5:
844 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF8
845 : RISCV::PseudoVC_I_SE_MF8;
846 break;
847 case 6:
848 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF4
849 : RISCV::PseudoVC_I_SE_MF4;
850 break;
851 case 7:
852 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF2
853 : RISCV::PseudoVC_I_SE_MF2;
854 break;
855 case 0:
856 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M1
857 : RISCV::PseudoVC_I_SE_M1;
858 break;
859 case 1:
860 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M2
861 : RISCV::PseudoVC_I_SE_M2;
862 break;
863 case 2:
864 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M4
865 : RISCV::PseudoVC_I_SE_M4;
866 break;
867 case 3:
868 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M8
869 : RISCV::PseudoVC_I_SE_M8;
870 break;
871 }
872
874 Opcode, DL, Node->getSimpleValueType(0), Operands));
875}
876
878 // If we have a custom node, we have already selected.
879 if (Node->isMachineOpcode()) {
880 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
881 Node->setNodeId(-1);
882 return;
883 }
884
885 // Instruction Selection not handled by the auto-generated tablegen selection
886 // should be handled here.
887 unsigned Opcode = Node->getOpcode();
888 MVT XLenVT = Subtarget->getXLenVT();
889 SDLoc DL(Node);
890 MVT VT = Node->getSimpleValueType(0);
891
892 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
893
894 switch (Opcode) {
895 case ISD::Constant: {
896 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
897 auto *ConstNode = cast<ConstantSDNode>(Node);
898 if (ConstNode->isZero()) {
899 SDValue New =
900 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
901 ReplaceNode(Node, New.getNode());
902 return;
903 }
904 int64_t Imm = ConstNode->getSExtValue();
905 // If only the lower 8 bits are used, try to convert this to a simm6 by
906 // sign-extending bit 7. This is neutral without the C extension, and
907 // allows C.LI to be used if C is present.
908 if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
909 Imm = SignExtend64<8>(Imm);
910 // If the upper XLen-16 bits are not used, try to convert this to a simm12
911 // by sign extending bit 15.
912 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
913 hasAllHUsers(Node))
914 Imm = SignExtend64<16>(Imm);
915 // If the upper 32-bits are not used try to convert this into a simm32 by
916 // sign extending bit 32.
917 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
918 Imm = SignExtend64<32>(Imm);
919
920 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
921 return;
922 }
923 case ISD::ConstantFP: {
924 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
925 auto [FPImm, NeedsFNeg] =
926 static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
927 VT);
928 if (FPImm >= 0) {
929 unsigned Opc;
930 unsigned FNegOpc;
931 switch (VT.SimpleTy) {
932 default:
933 llvm_unreachable("Unexpected size");
934 case MVT::f16:
935 Opc = RISCV::FLI_H;
936 FNegOpc = RISCV::FSGNJN_H;
937 break;
938 case MVT::f32:
939 Opc = RISCV::FLI_S;
940 FNegOpc = RISCV::FSGNJN_S;
941 break;
942 case MVT::f64:
943 Opc = RISCV::FLI_D;
944 FNegOpc = RISCV::FSGNJN_D;
945 break;
946 }
948 Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));
949 if (NeedsFNeg)
950 Res = CurDAG->getMachineNode(FNegOpc, DL, VT, SDValue(Res, 0),
951 SDValue(Res, 0));
952
953 ReplaceNode(Node, Res);
954 return;
955 }
956
957 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
958 SDValue Imm;
959 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
960 // create an integer immediate.
961 if (APF.isPosZero() || NegZeroF64)
962 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
963 else
964 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
965 *Subtarget);
966
967 bool HasZdinx = Subtarget->hasStdExtZdinx();
968 bool Is64Bit = Subtarget->is64Bit();
969 unsigned Opc;
970 switch (VT.SimpleTy) {
971 default:
972 llvm_unreachable("Unexpected size");
973 case MVT::bf16:
974 assert(Subtarget->hasStdExtZfbfmin());
975 Opc = RISCV::FMV_H_X;
976 break;
977 case MVT::f16:
978 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
979 break;
980 case MVT::f32:
981 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
982 break;
983 case MVT::f64:
984 // For RV32, we can't move from a GPR, we need to convert instead. This
985 // should only happen for +0.0 and -0.0.
986 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
987 if (Is64Bit)
988 Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
989 else
990 Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
991 break;
992 }
993
994 SDNode *Res;
995 if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
996 Res = CurDAG->getMachineNode(
997 Opc, DL, VT, Imm,
999 else
1000 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1001
1002 // For f64 -0.0, we need to insert a fneg.d idiom.
1003 if (NegZeroF64) {
1004 Opc = RISCV::FSGNJN_D;
1005 if (HasZdinx)
1006 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1007 Res =
1008 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1009 }
1010
1011 ReplaceNode(Node, Res);
1012 return;
1013 }
1015 if (!Subtarget->hasStdExtZdinx())
1016 break;
1017
1018 assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1019
1020 SDValue Ops[] = {
1021 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1022 Node->getOperand(0),
1023 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1024 Node->getOperand(1),
1025 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1026
1027 SDNode *N =
1028 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::f64, Ops);
1029 ReplaceNode(Node, N);
1030 return;
1031 }
1032 case RISCVISD::SplitF64: {
1033 if (Subtarget->hasStdExtZdinx()) {
1034 assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1035
1036 if (!SDValue(Node, 0).use_empty()) {
1037 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, VT,
1038 Node->getOperand(0));
1039 ReplaceUses(SDValue(Node, 0), Lo);
1040 }
1041
1042 if (!SDValue(Node, 1).use_empty()) {
1043 SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, VT,
1044 Node->getOperand(0));
1045 ReplaceUses(SDValue(Node, 1), Hi);
1046 }
1047
1048 CurDAG->RemoveDeadNode(Node);
1049 return;
1050 }
1051
1052 if (!Subtarget->hasStdExtZfa())
1053 break;
1054 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1055 "Unexpected subtarget");
1056
1057 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1058 if (!SDValue(Node, 0).use_empty()) {
1059 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1060 Node->getOperand(0));
1061 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1062 }
1063 if (!SDValue(Node, 1).use_empty()) {
1064 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1065 Node->getOperand(0));
1066 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1067 }
1068
1069 CurDAG->RemoveDeadNode(Node);
1070 return;
1071 }
1072 case ISD::SHL: {
1073 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1074 if (!N1C)
1075 break;
1076 SDValue N0 = Node->getOperand(0);
1077 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1078 !isa<ConstantSDNode>(N0.getOperand(1)))
1079 break;
1080 unsigned ShAmt = N1C->getZExtValue();
1081 uint64_t Mask = N0.getConstantOperandVal(1);
1082
1083 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
1084 // 32 leading zeros and C3 trailing zeros.
1085 if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
1086 unsigned XLen = Subtarget->getXLen();
1087 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1088 unsigned TrailingZeros = llvm::countr_zero(Mask);
1089 if (TrailingZeros > 0 && LeadingZeros == 32) {
1090 SDNode *SRLIW = CurDAG->getMachineNode(
1091 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1092 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1093 SDNode *SLLI = CurDAG->getMachineNode(
1094 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1095 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1096 ReplaceNode(Node, SLLI);
1097 return;
1098 }
1099 }
1100 break;
1101 }
1102 case ISD::SRL: {
1103 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1104 if (!N1C)
1105 break;
1106 SDValue N0 = Node->getOperand(0);
1107 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1108 break;
1109 unsigned ShAmt = N1C->getZExtValue();
1110 uint64_t Mask = N0.getConstantOperandVal(1);
1111
1112 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1113 // 32 leading zeros and C3 trailing zeros.
1114 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1115 unsigned XLen = Subtarget->getXLen();
1116 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1117 unsigned TrailingZeros = llvm::countr_zero(Mask);
1118 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1119 SDNode *SRLIW = CurDAG->getMachineNode(
1120 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1121 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1122 SDNode *SLLI = CurDAG->getMachineNode(
1123 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1124 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1125 ReplaceNode(Node, SLLI);
1126 return;
1127 }
1128 }
1129
1130 // Optimize (srl (and X, C2), C) ->
1131 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1132 // Where C2 is a mask with C3 trailing ones.
1133 // Taking into account that the C2 may have had lower bits unset by
1134 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1135 // This pattern occurs when type legalizing right shifts for types with
1136 // less than XLen bits.
1137 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1138 if (!isMask_64(Mask))
1139 break;
1140 unsigned TrailingOnes = llvm::countr_one(Mask);
1141 if (ShAmt >= TrailingOnes)
1142 break;
1143 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1144 if (TrailingOnes == 32) {
1145 SDNode *SRLI = CurDAG->getMachineNode(
1146 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1147 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1148 ReplaceNode(Node, SRLI);
1149 return;
1150 }
1151
1152 // Only do the remaining transforms if the AND has one use.
1153 if (!N0.hasOneUse())
1154 break;
1155
1156 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1157 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1158 SDNode *BEXTI = CurDAG->getMachineNode(
1159 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1160 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1161 ReplaceNode(Node, BEXTI);
1162 return;
1163 }
1164
1165 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1166 SDNode *SLLI =
1167 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1168 CurDAG->getTargetConstant(LShAmt, DL, VT));
1169 SDNode *SRLI = CurDAG->getMachineNode(
1170 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1171 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1172 ReplaceNode(Node, SRLI);
1173 return;
1174 }
1175 case ISD::SRA: {
1176 if (trySignedBitfieldExtract(Node))
1177 return;
1178
1179 // Optimize (sra (sext_inreg X, i16), C) ->
1180 // (srai (slli X, (XLen-16), (XLen-16) + C)
1181 // And (sra (sext_inreg X, i8), C) ->
1182 // (srai (slli X, (XLen-8), (XLen-8) + C)
1183 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1184 // This transform matches the code we get without Zbb. The shifts are more
1185 // compressible, and this can help expose CSE opportunities in the sdiv by
1186 // constant optimization.
1187 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1188 if (!N1C)
1189 break;
1190 SDValue N0 = Node->getOperand(0);
1191 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1192 break;
1193 unsigned ShAmt = N1C->getZExtValue();
1194 unsigned ExtSize =
1195 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1196 // ExtSize of 32 should use sraiw via tablegen pattern.
1197 if (ExtSize >= 32 || ShAmt >= ExtSize)
1198 break;
1199 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1200 SDNode *SLLI =
1201 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1202 CurDAG->getTargetConstant(LShAmt, DL, VT));
1203 SDNode *SRAI = CurDAG->getMachineNode(
1204 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1205 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1206 ReplaceNode(Node, SRAI);
1207 return;
1208 }
1209 case ISD::OR:
1210 case ISD::XOR:
1211 if (tryShrinkShlLogicImm(Node))
1212 return;
1213
1214 break;
1215 case ISD::AND: {
1216 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1217 if (!N1C)
1218 break;
1219 uint64_t C1 = N1C->getZExtValue();
1220 const bool isC1Mask = isMask_64(C1);
1221 const bool isC1ANDI = isInt<12>(C1);
1222
1223 SDValue N0 = Node->getOperand(0);
1224
1225 auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
1226 SDValue X, unsigned Msb,
1227 unsigned Lsb) {
1228 if (!Subtarget->hasVendorXTHeadBb())
1229 return false;
1230
1231 SDNode *TH_EXTU = CurDAG->getMachineNode(
1232 RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
1233 CurDAG->getTargetConstant(Lsb, DL, VT));
1234 ReplaceNode(Node, TH_EXTU);
1235 return true;
1236 };
1237
1238 bool LeftShift = N0.getOpcode() == ISD::SHL;
1239 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1240 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1241 if (!C)
1242 break;
1243 unsigned C2 = C->getZExtValue();
1244 unsigned XLen = Subtarget->getXLen();
1245 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1246
1247 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1248 // shift pair might offer more compression opportunities.
1249 // TODO: We could check for C extension here, but we don't have many lit
1250 // tests with the C extension enabled so not checking gets better
1251 // coverage.
1252 // TODO: What if ANDI faster than shift?
1253 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1254
1255 // Clear irrelevant bits in the mask.
1256 if (LeftShift)
1257 C1 &= maskTrailingZeros<uint64_t>(C2);
1258 else
1259 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1260
1261 // Some transforms should only be done if the shift has a single use or
1262 // the AND would become (srli (slli X, 32), 32)
1263 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1264
1265 SDValue X = N0.getOperand(0);
1266
1267 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1268 // with c3 leading zeros.
1269 if (!LeftShift && isC1Mask) {
1270 unsigned Leading = XLen - llvm::bit_width(C1);
1271 if (C2 < Leading) {
1272 // If the number of leading zeros is C2+32 this can be SRLIW.
1273 if (C2 + 32 == Leading) {
1274 SDNode *SRLIW = CurDAG->getMachineNode(
1275 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1276 ReplaceNode(Node, SRLIW);
1277 return;
1278 }
1279
1280 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1281 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1282 //
1283 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1284 // legalized and goes through DAG combine.
1285 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1286 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1287 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1288 SDNode *SRAIW =
1289 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1290 CurDAG->getTargetConstant(31, DL, VT));
1291 SDNode *SRLIW = CurDAG->getMachineNode(
1292 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1293 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1294 ReplaceNode(Node, SRLIW);
1295 return;
1296 }
1297
1298 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1299 // available.
1300 // Transform (and (srl x, C2), C1)
1301 // -> (<bfextract> x, msb, lsb)
1302 //
1303 // Make sure to keep this below the SRLIW cases, as we always want to
1304 // prefer the more common instruction.
1305 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1306 const unsigned Lsb = C2;
1307 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1308 return;
1309
1310 // (srli (slli x, c3-c2), c3).
1311 // Skip if we could use (zext.w (sraiw X, C2)).
1312 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1313 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1314 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1315 // Also Skip if we can use bexti or th.tst.
1316 Skip |= HasBitTest && Leading == XLen - 1;
1317 if (OneUseOrZExtW && !Skip) {
1318 SDNode *SLLI = CurDAG->getMachineNode(
1319 RISCV::SLLI, DL, VT, X,
1320 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1321 SDNode *SRLI = CurDAG->getMachineNode(
1322 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1323 CurDAG->getTargetConstant(Leading, DL, VT));
1324 ReplaceNode(Node, SRLI);
1325 return;
1326 }
1327 }
1328 }
1329
1330 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1331 // shifted by c2 bits with c3 leading zeros.
1332 if (LeftShift && isShiftedMask_64(C1)) {
1333 unsigned Leading = XLen - llvm::bit_width(C1);
1334
1335 if (C2 + Leading < XLen &&
1336 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1337 // Use slli.uw when possible.
1338 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1339 SDNode *SLLI_UW =
1340 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1341 CurDAG->getTargetConstant(C2, DL, VT));
1342 ReplaceNode(Node, SLLI_UW);
1343 return;
1344 }
1345
1346 // (srli (slli c2+c3), c3)
1347 if (OneUseOrZExtW && !IsCANDI) {
1348 SDNode *SLLI = CurDAG->getMachineNode(
1349 RISCV::SLLI, DL, VT, X,
1350 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1351 SDNode *SRLI = CurDAG->getMachineNode(
1352 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1353 CurDAG->getTargetConstant(Leading, DL, VT));
1354 ReplaceNode(Node, SRLI);
1355 return;
1356 }
1357 }
1358 }
1359
1360 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1361 // shifted mask with c2 leading zeros and c3 trailing zeros.
1362 if (!LeftShift && isShiftedMask_64(C1)) {
1363 unsigned Leading = XLen - llvm::bit_width(C1);
1364 unsigned Trailing = llvm::countr_zero(C1);
1365 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1366 !IsCANDI) {
1367 unsigned SrliOpc = RISCV::SRLI;
1368 // If the input is zexti32 we should use SRLIW.
1369 if (X.getOpcode() == ISD::AND &&
1370 isa<ConstantSDNode>(X.getOperand(1)) &&
1371 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1372 SrliOpc = RISCV::SRLIW;
1373 X = X.getOperand(0);
1374 }
1375 SDNode *SRLI = CurDAG->getMachineNode(
1376 SrliOpc, DL, VT, X,
1377 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1378 SDNode *SLLI = CurDAG->getMachineNode(
1379 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1380 CurDAG->getTargetConstant(Trailing, DL, VT));
1381 ReplaceNode(Node, SLLI);
1382 return;
1383 }
1384 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1385 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1386 OneUseOrZExtW && !IsCANDI) {
1387 SDNode *SRLIW = CurDAG->getMachineNode(
1388 RISCV::SRLIW, DL, VT, X,
1389 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1390 SDNode *SLLI = CurDAG->getMachineNode(
1391 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1392 CurDAG->getTargetConstant(Trailing, DL, VT));
1393 ReplaceNode(Node, SLLI);
1394 return;
1395 }
1396 }
1397
1398 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1399 // shifted mask with no leading zeros and c3 trailing zeros.
1400 if (LeftShift && isShiftedMask_64(C1)) {
1401 unsigned Leading = XLen - llvm::bit_width(C1);
1402 unsigned Trailing = llvm::countr_zero(C1);
1403 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1404 SDNode *SRLI = CurDAG->getMachineNode(
1405 RISCV::SRLI, DL, VT, X,
1406 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1407 SDNode *SLLI = CurDAG->getMachineNode(
1408 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1409 CurDAG->getTargetConstant(Trailing, DL, VT));
1410 ReplaceNode(Node, SLLI);
1411 return;
1412 }
1413 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1414 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1415 SDNode *SRLIW = CurDAG->getMachineNode(
1416 RISCV::SRLIW, DL, VT, X,
1417 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1418 SDNode *SLLI = CurDAG->getMachineNode(
1419 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1420 CurDAG->getTargetConstant(Trailing, DL, VT));
1421 ReplaceNode(Node, SLLI);
1422 return;
1423 }
1424
1425 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1426 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1427 Subtarget->hasStdExtZba()) {
1428 SDNode *SRLI = CurDAG->getMachineNode(
1429 RISCV::SRLI, DL, VT, X,
1430 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1431 SDNode *SLLI_UW = CurDAG->getMachineNode(
1432 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1433 CurDAG->getTargetConstant(Trailing, DL, VT));
1434 ReplaceNode(Node, SLLI_UW);
1435 return;
1436 }
1437 }
1438 }
1439
1440 // If C1 masks off the upper bits only (but can't be formed as an
1441 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1442 // available.
1443 // Transform (and x, C1)
1444 // -> (<bfextract> x, msb, lsb)
1445 if (isC1Mask && !isC1ANDI) {
1446 const unsigned Msb = llvm::bit_width(C1) - 1;
1447 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1448 return;
1449 }
1450
1451 if (tryShrinkShlLogicImm(Node))
1452 return;
1453
1454 break;
1455 }
1456 case ISD::MUL: {
1457 // Special case for calculating (mul (and X, C2), C1) where the full product
1458 // fits in XLen bits. We can shift X left by the number of leading zeros in
1459 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1460 // product has XLen trailing zeros, putting it in the output of MULHU. This
1461 // can avoid materializing a constant in a register for C2.
1462
1463 // RHS should be a constant.
1464 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1465 if (!N1C || !N1C->hasOneUse())
1466 break;
1467
1468 // LHS should be an AND with constant.
1469 SDValue N0 = Node->getOperand(0);
1470 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1471 break;
1472
1474
1475 // Constant should be a mask.
1476 if (!isMask_64(C2))
1477 break;
1478
1479 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1480 // multiple users or the constant is a simm12. This prevents inserting a
1481 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1482 // make it more costly to materialize. Otherwise, using a SLLI might allow
1483 // it to be compressed.
1484 bool IsANDIOrZExt =
1485 isInt<12>(C2) ||
1486 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1487 // With XTHeadBb, we can use TH.EXTU.
1488 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1489 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1490 break;
1491 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1492 // the constant is a simm32.
1493 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1494 // With XTHeadBb, we can use TH.EXTU.
1495 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1496 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1497 break;
1498
1499 // We need to shift left the AND input and C1 by a total of XLen bits.
1500
1501 // How far left do we need to shift the AND input?
1502 unsigned XLen = Subtarget->getXLen();
1503 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1504
1505 // The constant gets shifted by the remaining amount unless that would
1506 // shift bits out.
1507 uint64_t C1 = N1C->getZExtValue();
1508 unsigned ConstantShift = XLen - LeadingZeros;
1509 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1510 break;
1511
1512 uint64_t ShiftedC1 = C1 << ConstantShift;
1513 // If this RV32, we need to sign extend the constant.
1514 if (XLen == 32)
1515 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1516
1517 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1518 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1519 SDNode *SLLI =
1520 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1521 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1522 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1523 SDValue(SLLI, 0), SDValue(Imm, 0));
1524 ReplaceNode(Node, MULHU);
1525 return;
1526 }
1527 case ISD::LOAD: {
1528 if (tryIndexedLoad(Node))
1529 return;
1530
1531 if (Subtarget->hasVendorXCVmem()) {
1532 // We match post-incrementing load here
1533 LoadSDNode *Load = cast<LoadSDNode>(Node);
1534 if (Load->getAddressingMode() != ISD::POST_INC)
1535 break;
1536
1537 SDValue Chain = Node->getOperand(0);
1538 SDValue Base = Node->getOperand(1);
1539 SDValue Offset = Node->getOperand(2);
1540
1541 bool Simm12 = false;
1542 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1543
1544 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1545 int ConstantVal = ConstantOffset->getSExtValue();
1546 Simm12 = isInt<12>(ConstantVal);
1547 if (Simm12)
1548 Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1549 Offset.getValueType());
1550 }
1551
1552 unsigned Opcode = 0;
1553 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1554 case MVT::i8:
1555 if (Simm12 && SignExtend)
1556 Opcode = RISCV::CV_LB_ri_inc;
1557 else if (Simm12 && !SignExtend)
1558 Opcode = RISCV::CV_LBU_ri_inc;
1559 else if (!Simm12 && SignExtend)
1560 Opcode = RISCV::CV_LB_rr_inc;
1561 else
1562 Opcode = RISCV::CV_LBU_rr_inc;
1563 break;
1564 case MVT::i16:
1565 if (Simm12 && SignExtend)
1566 Opcode = RISCV::CV_LH_ri_inc;
1567 else if (Simm12 && !SignExtend)
1568 Opcode = RISCV::CV_LHU_ri_inc;
1569 else if (!Simm12 && SignExtend)
1570 Opcode = RISCV::CV_LH_rr_inc;
1571 else
1572 Opcode = RISCV::CV_LHU_rr_inc;
1573 break;
1574 case MVT::i32:
1575 if (Simm12)
1576 Opcode = RISCV::CV_LW_ri_inc;
1577 else
1578 Opcode = RISCV::CV_LW_rr_inc;
1579 break;
1580 default:
1581 break;
1582 }
1583 if (!Opcode)
1584 break;
1585
1586 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1587 Chain.getSimpleValueType(), Base,
1588 Offset, Chain));
1589 return;
1590 }
1591 break;
1592 }
1594 unsigned IntNo = Node->getConstantOperandVal(0);
1595 switch (IntNo) {
1596 // By default we do not custom select any intrinsic.
1597 default:
1598 break;
1599 case Intrinsic::riscv_vmsgeu:
1600 case Intrinsic::riscv_vmsge: {
1601 SDValue Src1 = Node->getOperand(1);
1602 SDValue Src2 = Node->getOperand(2);
1603 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1604 bool IsCmpUnsignedZero = false;
1605 // Only custom select scalar second operand.
1606 if (Src2.getValueType() != XLenVT)
1607 break;
1608 // Small constants are handled with patterns.
1609 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1610 int64_t CVal = C->getSExtValue();
1611 if (CVal >= -15 && CVal <= 16) {
1612 if (!IsUnsigned || CVal != 0)
1613 break;
1614 IsCmpUnsignedZero = true;
1615 }
1616 }
1617 MVT Src1VT = Src1.getSimpleValueType();
1618 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;
1619 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1620 default:
1621 llvm_unreachable("Unexpected LMUL!");
1622#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \
1623 case RISCVII::VLMUL::lmulenum: \
1624 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1625 : RISCV::PseudoVMSLT_VX_##suffix; \
1626 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1627 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \
1628 break;
1629 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)
1630 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)
1631 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)
1633 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)
1634 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)
1635 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)
1636#undef CASE_VMSLT_VMNAND_VMSET_OPCODES
1637 }
1639 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1640 SDValue VL;
1641 selectVLOp(Node->getOperand(3), VL);
1642
1643 // If vmsgeu with 0 immediate, expand it to vmset.
1644 if (IsCmpUnsignedZero) {
1645 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));
1646 return;
1647 }
1648
1649 // Expand to
1650 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1651 SDValue Cmp = SDValue(
1652 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1653 0);
1654 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1655 {Cmp, Cmp, VL, SEW}));
1656 return;
1657 }
1658 case Intrinsic::riscv_vmsgeu_mask:
1659 case Intrinsic::riscv_vmsge_mask: {
1660 SDValue Src1 = Node->getOperand(2);
1661 SDValue Src2 = Node->getOperand(3);
1662 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1663 bool IsCmpUnsignedZero = false;
1664 // Only custom select scalar second operand.
1665 if (Src2.getValueType() != XLenVT)
1666 break;
1667 // Small constants are handled with patterns.
1668 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1669 int64_t CVal = C->getSExtValue();
1670 if (CVal >= -15 && CVal <= 16) {
1671 if (!IsUnsigned || CVal != 0)
1672 break;
1673 IsCmpUnsignedZero = true;
1674 }
1675 }
1676 MVT Src1VT = Src1.getSimpleValueType();
1677 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1678 VMOROpcode;
1679 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1680 default:
1681 llvm_unreachable("Unexpected LMUL!");
1682#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \
1683 case RISCVII::VLMUL::lmulenum: \
1684 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1685 : RISCV::PseudoVMSLT_VX_##suffix; \
1686 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
1687 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
1688 break;
1689 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)
1690 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)
1691 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)
1692 CASE_VMSLT_OPCODES(LMUL_1, M1, B8)
1693 CASE_VMSLT_OPCODES(LMUL_2, M2, B16)
1694 CASE_VMSLT_OPCODES(LMUL_4, M4, B32)
1695 CASE_VMSLT_OPCODES(LMUL_8, M8, B64)
1696#undef CASE_VMSLT_OPCODES
1697 }
1698 // Mask operations use the LMUL from the mask type.
1699 switch (RISCVTargetLowering::getLMUL(VT)) {
1700 default:
1701 llvm_unreachable("Unexpected LMUL!");
1702#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
1703 case RISCVII::VLMUL::lmulenum: \
1704 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
1705 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
1706 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
1707 break;
1708 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)
1709 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)
1710 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)
1715#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1716 }
1718 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1719 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1720 SDValue VL;
1721 selectVLOp(Node->getOperand(5), VL);
1722 SDValue MaskedOff = Node->getOperand(1);
1723 SDValue Mask = Node->getOperand(4);
1724
1725 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.
1726 if (IsCmpUnsignedZero) {
1727 // We don't need vmor if the MaskedOff and the Mask are the same
1728 // value.
1729 if (Mask == MaskedOff) {
1730 ReplaceUses(Node, Mask.getNode());
1731 return;
1732 }
1733 ReplaceNode(Node,
1734 CurDAG->getMachineNode(VMOROpcode, DL, VT,
1735 {Mask, MaskedOff, VL, MaskSEW}));
1736 return;
1737 }
1738
1739 // If the MaskedOff value and the Mask are the same value use
1740 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
1741 // This avoids needing to copy v0 to vd before starting the next sequence.
1742 if (Mask == MaskedOff) {
1743 SDValue Cmp = SDValue(
1744 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1745 0);
1746 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1747 {Mask, Cmp, VL, MaskSEW}));
1748 return;
1749 }
1750
1751 // Mask needs to be copied to V0.
1753 RISCV::V0, Mask, SDValue());
1754 SDValue Glue = Chain.getValue(1);
1755 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1756
1757 // Otherwise use
1758 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1759 // The result is mask undisturbed.
1760 // We use the same instructions to emulate mask agnostic behavior, because
1761 // the agnostic result can be either undisturbed or all 1.
1762 SDValue Cmp = SDValue(
1763 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1764 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1765 0);
1766 // vmxor.mm vd, vd, v0 is used to update active value.
1767 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1768 {Cmp, Mask, VL, MaskSEW}));
1769 return;
1770 }
1771 case Intrinsic::riscv_vsetvli:
1772 case Intrinsic::riscv_vsetvlimax:
1773 return selectVSETVLI(Node);
1774 }
1775 break;
1776 }
1778 unsigned IntNo = Node->getConstantOperandVal(1);
1779 switch (IntNo) {
1780 // By default we do not custom select any intrinsic.
1781 default:
1782 break;
1783 case Intrinsic::riscv_vlseg2:
1784 case Intrinsic::riscv_vlseg3:
1785 case Intrinsic::riscv_vlseg4:
1786 case Intrinsic::riscv_vlseg5:
1787 case Intrinsic::riscv_vlseg6:
1788 case Intrinsic::riscv_vlseg7:
1789 case Intrinsic::riscv_vlseg8: {
1790 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1791 return;
1792 }
1793 case Intrinsic::riscv_vlseg2_mask:
1794 case Intrinsic::riscv_vlseg3_mask:
1795 case Intrinsic::riscv_vlseg4_mask:
1796 case Intrinsic::riscv_vlseg5_mask:
1797 case Intrinsic::riscv_vlseg6_mask:
1798 case Intrinsic::riscv_vlseg7_mask:
1799 case Intrinsic::riscv_vlseg8_mask: {
1800 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1801 return;
1802 }
1803 case Intrinsic::riscv_vlsseg2:
1804 case Intrinsic::riscv_vlsseg3:
1805 case Intrinsic::riscv_vlsseg4:
1806 case Intrinsic::riscv_vlsseg5:
1807 case Intrinsic::riscv_vlsseg6:
1808 case Intrinsic::riscv_vlsseg7:
1809 case Intrinsic::riscv_vlsseg8: {
1810 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1811 return;
1812 }
1813 case Intrinsic::riscv_vlsseg2_mask:
1814 case Intrinsic::riscv_vlsseg3_mask:
1815 case Intrinsic::riscv_vlsseg4_mask:
1816 case Intrinsic::riscv_vlsseg5_mask:
1817 case Intrinsic::riscv_vlsseg6_mask:
1818 case Intrinsic::riscv_vlsseg7_mask:
1819 case Intrinsic::riscv_vlsseg8_mask: {
1820 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1821 return;
1822 }
1823 case Intrinsic::riscv_vloxseg2:
1824 case Intrinsic::riscv_vloxseg3:
1825 case Intrinsic::riscv_vloxseg4:
1826 case Intrinsic::riscv_vloxseg5:
1827 case Intrinsic::riscv_vloxseg6:
1828 case Intrinsic::riscv_vloxseg7:
1829 case Intrinsic::riscv_vloxseg8:
1830 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1831 return;
1832 case Intrinsic::riscv_vluxseg2:
1833 case Intrinsic::riscv_vluxseg3:
1834 case Intrinsic::riscv_vluxseg4:
1835 case Intrinsic::riscv_vluxseg5:
1836 case Intrinsic::riscv_vluxseg6:
1837 case Intrinsic::riscv_vluxseg7:
1838 case Intrinsic::riscv_vluxseg8:
1839 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1840 return;
1841 case Intrinsic::riscv_vloxseg2_mask:
1842 case Intrinsic::riscv_vloxseg3_mask:
1843 case Intrinsic::riscv_vloxseg4_mask:
1844 case Intrinsic::riscv_vloxseg5_mask:
1845 case Intrinsic::riscv_vloxseg6_mask:
1846 case Intrinsic::riscv_vloxseg7_mask:
1847 case Intrinsic::riscv_vloxseg8_mask:
1848 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1849 return;
1850 case Intrinsic::riscv_vluxseg2_mask:
1851 case Intrinsic::riscv_vluxseg3_mask:
1852 case Intrinsic::riscv_vluxseg4_mask:
1853 case Intrinsic::riscv_vluxseg5_mask:
1854 case Intrinsic::riscv_vluxseg6_mask:
1855 case Intrinsic::riscv_vluxseg7_mask:
1856 case Intrinsic::riscv_vluxseg8_mask:
1857 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1858 return;
1859 case Intrinsic::riscv_vlseg8ff:
1860 case Intrinsic::riscv_vlseg7ff:
1861 case Intrinsic::riscv_vlseg6ff:
1862 case Intrinsic::riscv_vlseg5ff:
1863 case Intrinsic::riscv_vlseg4ff:
1864 case Intrinsic::riscv_vlseg3ff:
1865 case Intrinsic::riscv_vlseg2ff: {
1866 selectVLSEGFF(Node, /*IsMasked*/ false);
1867 return;
1868 }
1869 case Intrinsic::riscv_vlseg8ff_mask:
1870 case Intrinsic::riscv_vlseg7ff_mask:
1871 case Intrinsic::riscv_vlseg6ff_mask:
1872 case Intrinsic::riscv_vlseg5ff_mask:
1873 case Intrinsic::riscv_vlseg4ff_mask:
1874 case Intrinsic::riscv_vlseg3ff_mask:
1875 case Intrinsic::riscv_vlseg2ff_mask: {
1876 selectVLSEGFF(Node, /*IsMasked*/ true);
1877 return;
1878 }
1879 case Intrinsic::riscv_vloxei:
1880 case Intrinsic::riscv_vloxei_mask:
1881 case Intrinsic::riscv_vluxei:
1882 case Intrinsic::riscv_vluxei_mask: {
1883 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1884 IntNo == Intrinsic::riscv_vluxei_mask;
1885 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1886 IntNo == Intrinsic::riscv_vloxei_mask;
1887
1888 MVT VT = Node->getSimpleValueType(0);
1889 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1890
1891 unsigned CurOp = 2;
1893 Operands.push_back(Node->getOperand(CurOp++));
1894
1895 MVT IndexVT;
1896 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1897 /*IsStridedOrIndexed*/ true, Operands,
1898 /*IsLoad=*/true, &IndexVT);
1899
1901 "Element count mismatch");
1902
1904 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1905 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1906 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1907 report_fatal_error("The V extension does not support EEW=64 for index "
1908 "values when XLEN=32");
1909 }
1910 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
1911 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
1912 static_cast<unsigned>(IndexLMUL));
1913 MachineSDNode *Load =
1914 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1915
1916 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1917 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1918
1919 ReplaceNode(Node, Load);
1920 return;
1921 }
1922 case Intrinsic::riscv_vlm:
1923 case Intrinsic::riscv_vle:
1924 case Intrinsic::riscv_vle_mask:
1925 case Intrinsic::riscv_vlse:
1926 case Intrinsic::riscv_vlse_mask: {
1927 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
1928 IntNo == Intrinsic::riscv_vlse_mask;
1929 bool IsStrided =
1930 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
1931
1932 MVT VT = Node->getSimpleValueType(0);
1933 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1934
1935 // The riscv_vlm intrinsic are always tail agnostic and no passthru
1936 // operand at the IR level. In pseudos, they have both policy and
1937 // passthru operand. The passthru operand is needed to track the
1938 // "tail undefined" state, and the policy is there just for
1939 // for consistency - it will always be "don't care" for the
1940 // unmasked form.
1941 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
1942 unsigned CurOp = 2;
1944 if (HasPassthruOperand)
1945 Operands.push_back(Node->getOperand(CurOp++));
1946 else {
1947 // We eagerly lower to implicit_def (instead of undef), as we
1948 // otherwise fail to select nodes such as: nxv1i1 = undef
1949 SDNode *Passthru =
1950 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
1951 Operands.push_back(SDValue(Passthru, 0));
1952 }
1953 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1954 Operands, /*IsLoad=*/true);
1955
1957 const RISCV::VLEPseudo *P =
1958 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
1959 static_cast<unsigned>(LMUL));
1960 MachineSDNode *Load =
1961 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1962
1963 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1964 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1965
1966 ReplaceNode(Node, Load);
1967 return;
1968 }
1969 case Intrinsic::riscv_vleff:
1970 case Intrinsic::riscv_vleff_mask: {
1971 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
1972
1973 MVT VT = Node->getSimpleValueType(0);
1974 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1975
1976 unsigned CurOp = 2;
1978 Operands.push_back(Node->getOperand(CurOp++));
1979 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1980 /*IsStridedOrIndexed*/ false, Operands,
1981 /*IsLoad=*/true);
1982
1984 const RISCV::VLEPseudo *P =
1985 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
1986 Log2SEW, static_cast<unsigned>(LMUL));
1988 P->Pseudo, DL, Node->getVTList(), Operands);
1989 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1990 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1991
1992 ReplaceNode(Node, Load);
1993 return;
1994 }
1995 }
1996 break;
1997 }
1998 case ISD::INTRINSIC_VOID: {
1999 unsigned IntNo = Node->getConstantOperandVal(1);
2000 switch (IntNo) {
2001 case Intrinsic::riscv_vsseg2:
2002 case Intrinsic::riscv_vsseg3:
2003 case Intrinsic::riscv_vsseg4:
2004 case Intrinsic::riscv_vsseg5:
2005 case Intrinsic::riscv_vsseg6:
2006 case Intrinsic::riscv_vsseg7:
2007 case Intrinsic::riscv_vsseg8: {
2008 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
2009 return;
2010 }
2011 case Intrinsic::riscv_vsseg2_mask:
2012 case Intrinsic::riscv_vsseg3_mask:
2013 case Intrinsic::riscv_vsseg4_mask:
2014 case Intrinsic::riscv_vsseg5_mask:
2015 case Intrinsic::riscv_vsseg6_mask:
2016 case Intrinsic::riscv_vsseg7_mask:
2017 case Intrinsic::riscv_vsseg8_mask: {
2018 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
2019 return;
2020 }
2021 case Intrinsic::riscv_vssseg2:
2022 case Intrinsic::riscv_vssseg3:
2023 case Intrinsic::riscv_vssseg4:
2024 case Intrinsic::riscv_vssseg5:
2025 case Intrinsic::riscv_vssseg6:
2026 case Intrinsic::riscv_vssseg7:
2027 case Intrinsic::riscv_vssseg8: {
2028 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
2029 return;
2030 }
2031 case Intrinsic::riscv_vssseg2_mask:
2032 case Intrinsic::riscv_vssseg3_mask:
2033 case Intrinsic::riscv_vssseg4_mask:
2034 case Intrinsic::riscv_vssseg5_mask:
2035 case Intrinsic::riscv_vssseg6_mask:
2036 case Intrinsic::riscv_vssseg7_mask:
2037 case Intrinsic::riscv_vssseg8_mask: {
2038 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
2039 return;
2040 }
2041 case Intrinsic::riscv_vsoxseg2:
2042 case Intrinsic::riscv_vsoxseg3:
2043 case Intrinsic::riscv_vsoxseg4:
2044 case Intrinsic::riscv_vsoxseg5:
2045 case Intrinsic::riscv_vsoxseg6:
2046 case Intrinsic::riscv_vsoxseg7:
2047 case Intrinsic::riscv_vsoxseg8:
2048 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
2049 return;
2050 case Intrinsic::riscv_vsuxseg2:
2051 case Intrinsic::riscv_vsuxseg3:
2052 case Intrinsic::riscv_vsuxseg4:
2053 case Intrinsic::riscv_vsuxseg5:
2054 case Intrinsic::riscv_vsuxseg6:
2055 case Intrinsic::riscv_vsuxseg7:
2056 case Intrinsic::riscv_vsuxseg8:
2057 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
2058 return;
2059 case Intrinsic::riscv_vsoxseg2_mask:
2060 case Intrinsic::riscv_vsoxseg3_mask:
2061 case Intrinsic::riscv_vsoxseg4_mask:
2062 case Intrinsic::riscv_vsoxseg5_mask:
2063 case Intrinsic::riscv_vsoxseg6_mask:
2064 case Intrinsic::riscv_vsoxseg7_mask:
2065 case Intrinsic::riscv_vsoxseg8_mask:
2066 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
2067 return;
2068 case Intrinsic::riscv_vsuxseg2_mask:
2069 case Intrinsic::riscv_vsuxseg3_mask:
2070 case Intrinsic::riscv_vsuxseg4_mask:
2071 case Intrinsic::riscv_vsuxseg5_mask:
2072 case Intrinsic::riscv_vsuxseg6_mask:
2073 case Intrinsic::riscv_vsuxseg7_mask:
2074 case Intrinsic::riscv_vsuxseg8_mask:
2075 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
2076 return;
2077 case Intrinsic::riscv_vsoxei:
2078 case Intrinsic::riscv_vsoxei_mask:
2079 case Intrinsic::riscv_vsuxei:
2080 case Intrinsic::riscv_vsuxei_mask: {
2081 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2082 IntNo == Intrinsic::riscv_vsuxei_mask;
2083 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2084 IntNo == Intrinsic::riscv_vsoxei_mask;
2085
2086 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2087 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2088
2089 unsigned CurOp = 2;
2091 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2092
2093 MVT IndexVT;
2094 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2095 /*IsStridedOrIndexed*/ true, Operands,
2096 /*IsLoad=*/false, &IndexVT);
2097
2099 "Element count mismatch");
2100
2102 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2103 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2104 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2105 report_fatal_error("The V extension does not support EEW=64 for index "
2106 "values when XLEN=32");
2107 }
2108 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2109 IsMasked, IsOrdered, IndexLog2EEW,
2110 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2111 MachineSDNode *Store =
2112 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2113
2114 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2115 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2116
2117 ReplaceNode(Node, Store);
2118 return;
2119 }
2120 case Intrinsic::riscv_vsm:
2121 case Intrinsic::riscv_vse:
2122 case Intrinsic::riscv_vse_mask:
2123 case Intrinsic::riscv_vsse:
2124 case Intrinsic::riscv_vsse_mask: {
2125 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2126 IntNo == Intrinsic::riscv_vsse_mask;
2127 bool IsStrided =
2128 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2129
2130 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2131 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2132
2133 unsigned CurOp = 2;
2135 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2136
2137 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2138 Operands);
2139
2141 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2142 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2143 MachineSDNode *Store =
2144 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2145 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2146 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2147
2148 ReplaceNode(Node, Store);
2149 return;
2150 }
2151 case Intrinsic::riscv_sf_vc_x_se:
2152 case Intrinsic::riscv_sf_vc_i_se:
2153 selectSF_VC_X_SE(Node);
2154 return;
2155 }
2156 break;
2157 }
2158 case ISD::BITCAST: {
2159 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2160 // Just drop bitcasts between vectors if both are fixed or both are
2161 // scalable.
2162 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2163 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2164 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2165 CurDAG->RemoveDeadNode(Node);
2166 return;
2167 }
2168 break;
2169 }
2170 case ISD::INSERT_SUBVECTOR: {
2171 SDValue V = Node->getOperand(0);
2172 SDValue SubV = Node->getOperand(1);
2173 SDLoc DL(SubV);
2174 auto Idx = Node->getConstantOperandVal(2);
2175 MVT SubVecVT = SubV.getSimpleValueType();
2176
2177 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2178 MVT SubVecContainerVT = SubVecVT;
2179 // Establish the correct scalable-vector types for any fixed-length type.
2180 if (SubVecVT.isFixedLengthVector()) {
2181 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2183 [[maybe_unused]] bool ExactlyVecRegSized =
2184 Subtarget->expandVScale(SubVecVT.getSizeInBits())
2185 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2186 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2187 .getKnownMinValue()));
2188 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2189 }
2190 MVT ContainerVT = VT;
2191 if (VT.isFixedLengthVector())
2192 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2193
2194 const auto *TRI = Subtarget->getRegisterInfo();
2195 unsigned SubRegIdx;
2196 std::tie(SubRegIdx, Idx) =
2198 ContainerVT, SubVecContainerVT, Idx, TRI);
2199
2200 // If the Idx hasn't been completely eliminated then this is a subvector
2201 // insert which doesn't naturally align to a vector register. These must
2202 // be handled using instructions to manipulate the vector registers.
2203 if (Idx != 0)
2204 break;
2205
2206 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
2207 [[maybe_unused]] bool IsSubVecPartReg =
2208 SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
2209 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
2210 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
2211 assert((!IsSubVecPartReg || V.isUndef()) &&
2212 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2213 "the subvector is smaller than a full-sized register");
2214
2215 // If we haven't set a SubRegIdx, then we must be going between
2216 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2217 if (SubRegIdx == RISCV::NoSubRegister) {
2218 unsigned InRegClassID =
2221 InRegClassID &&
2222 "Unexpected subvector extraction");
2223 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2224 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2225 DL, VT, SubV, RC);
2226 ReplaceNode(Node, NewNode);
2227 return;
2228 }
2229
2230 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2231 ReplaceNode(Node, Insert.getNode());
2232 return;
2233 }
2235 SDValue V = Node->getOperand(0);
2236 auto Idx = Node->getConstantOperandVal(1);
2237 MVT InVT = V.getSimpleValueType();
2238 SDLoc DL(V);
2239
2240 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2241 MVT SubVecContainerVT = VT;
2242 // Establish the correct scalable-vector types for any fixed-length type.
2243 if (VT.isFixedLengthVector()) {
2244 assert(Idx == 0);
2245 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2246 }
2247 if (InVT.isFixedLengthVector())
2248 InVT = TLI.getContainerForFixedLengthVector(InVT);
2249
2250 const auto *TRI = Subtarget->getRegisterInfo();
2251 unsigned SubRegIdx;
2252 std::tie(SubRegIdx, Idx) =
2254 InVT, SubVecContainerVT, Idx, TRI);
2255
2256 // If the Idx hasn't been completely eliminated then this is a subvector
2257 // extract which doesn't naturally align to a vector register. These must
2258 // be handled using instructions to manipulate the vector registers.
2259 if (Idx != 0)
2260 break;
2261
2262 // If we haven't set a SubRegIdx, then we must be going between
2263 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2264 if (SubRegIdx == RISCV::NoSubRegister) {
2265 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2267 InRegClassID &&
2268 "Unexpected subvector extraction");
2269 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2270 SDNode *NewNode =
2271 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2272 ReplaceNode(Node, NewNode);
2273 return;
2274 }
2275
2276 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2277 ReplaceNode(Node, Extract.getNode());
2278 return;
2279 }
2283 case RISCVISD::VFMV_V_F_VL: {
2284 // Try to match splat of a scalar load to a strided load with stride of x0.
2285 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2286 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2287 if (!Node->getOperand(0).isUndef())
2288 break;
2289 SDValue Src = Node->getOperand(1);
2290 auto *Ld = dyn_cast<LoadSDNode>(Src);
2291 // Can't fold load update node because the second
2292 // output is used so that load update node can't be removed.
2293 if (!Ld || Ld->isIndexed())
2294 break;
2295 EVT MemVT = Ld->getMemoryVT();
2296 // The memory VT should be the same size as the element type.
2297 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2298 break;
2299 if (!IsProfitableToFold(Src, Node, Node) ||
2300 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2301 break;
2302
2303 SDValue VL;
2304 if (IsScalarMove) {
2305 // We could deal with more VL if we update the VSETVLI insert pass to
2306 // avoid introducing more VSETVLI.
2307 if (!isOneConstant(Node->getOperand(2)))
2308 break;
2309 selectVLOp(Node->getOperand(2), VL);
2310 } else
2311 selectVLOp(Node->getOperand(2), VL);
2312
2313 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2314 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2315
2316 // If VL=1, then we don't need to do a strided load and can just do a
2317 // regular load.
2318 bool IsStrided = !isOneConstant(VL);
2319
2320 // Only do a strided load if we have optimized zero-stride vector load.
2321 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2322 break;
2323
2325 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2326 Ld->getBasePtr()};
2327 if (IsStrided)
2328 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2330 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2331 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2332
2334 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2335 /*IsMasked*/ false, IsStrided, /*FF*/ false,
2336 Log2SEW, static_cast<unsigned>(LMUL));
2337 MachineSDNode *Load =
2338 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2339 // Update the chain.
2340 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2341 // Record the mem-refs
2342 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2343 // Replace the splat with the vlse.
2344 ReplaceNode(Node, Load);
2345 return;
2346 }
2347 case ISD::PREFETCH:
2348 unsigned Locality = Node->getConstantOperandVal(3);
2349 if (Locality > 2)
2350 break;
2351
2352 if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) {
2353 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2355
2356 int NontemporalLevel = 0;
2357 switch (Locality) {
2358 case 0:
2359 NontemporalLevel = 3; // NTL.ALL
2360 break;
2361 case 1:
2362 NontemporalLevel = 1; // NTL.PALL
2363 break;
2364 case 2:
2365 NontemporalLevel = 0; // NTL.P1
2366 break;
2367 default:
2368 llvm_unreachable("unexpected locality value.");
2369 }
2370
2371 if (NontemporalLevel & 0b1)
2373 if (NontemporalLevel & 0b10)
2375 }
2376 break;
2377 }
2378
2379 // Select the default instruction.
2380 SelectCode(Node);
2381}
2382
2384 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2385 std::vector<SDValue> &OutOps) {
2386 // Always produce a register and immediate operand, as expected by
2387 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2388 switch (ConstraintID) {
2391 SDValue Op0, Op1;
2392 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2393 assert(Found && "SelectAddrRegImm should always succeed");
2394 OutOps.push_back(Op0);
2395 OutOps.push_back(Op1);
2396 return false;
2397 }
2399 OutOps.push_back(Op);
2400 OutOps.push_back(
2401 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2402 return false;
2403 default:
2404 report_fatal_error("Unexpected asm memory constraint " +
2405 InlineAsm::getMemConstraintName(ConstraintID));
2406 }
2407
2408 return true;
2409}
2410
2412 SDValue &Offset) {
2413 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2414 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2415 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2416 return true;
2417 }
2418
2419 return false;
2420}
2421
2422// Select a frame index and an optional immediate offset from an ADD or OR.
2424 SDValue &Offset) {
2426 return true;
2427
2429 return false;
2430
2431 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
2432 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2433 if (isInt<12>(CVal)) {
2434 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
2435 Subtarget->getXLenVT());
2437 Subtarget->getXLenVT());
2438 return true;
2439 }
2440 }
2441
2442 return false;
2443}
2444
2445// Fold constant addresses.
2446static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2447 const MVT VT, const RISCVSubtarget *Subtarget,
2449 bool IsPrefetch = false) {
2450 if (!isa<ConstantSDNode>(Addr))
2451 return false;
2452
2453 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2454
2455 // If the constant is a simm12, we can fold the whole constant and use X0 as
2456 // the base. If the constant can be materialized with LUI+simm12, use LUI as
2457 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2458 int64_t Lo12 = SignExtend64<12>(CVal);
2459 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2460 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2461 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2462 return false;
2463
2464 if (Hi) {
2465 int64_t Hi20 = (Hi >> 12) & 0xfffff;
2466 Base = SDValue(
2467 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2468 CurDAG->getTargetConstant(Hi20, DL, VT)),
2469 0);
2470 } else {
2471 Base = CurDAG->getRegister(RISCV::X0, VT);
2472 }
2473 Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2474 return true;
2475 }
2476
2477 // Ask how constant materialization would handle this constant.
2478 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2479
2480 // If the last instruction would be an ADDI, we can fold its immediate and
2481 // emit the rest of the sequence as the base.
2482 if (Seq.back().getOpcode() != RISCV::ADDI)
2483 return false;
2484 Lo12 = Seq.back().getImm();
2485 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2486 return false;
2487
2488 // Drop the last instruction.
2489 Seq.pop_back();
2490 assert(!Seq.empty() && "Expected more instructions in sequence");
2491
2492 Base = selectImmSeq(CurDAG, DL, VT, Seq);
2493 Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2494 return true;
2495}
2496
2497// Is this ADD instruction only used as the base pointer of scalar loads and
2498// stores?
2500 for (auto *Use : Add->uses()) {
2501 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
2502 Use->getOpcode() != ISD::ATOMIC_LOAD &&
2503 Use->getOpcode() != ISD::ATOMIC_STORE)
2504 return false;
2505 EVT VT = cast<MemSDNode>(Use)->getMemoryVT();
2506 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2507 VT != MVT::f64)
2508 return false;
2509 // Don't allow stores of the value. It must be used as the address.
2510 if (Use->getOpcode() == ISD::STORE &&
2511 cast<StoreSDNode>(Use)->getValue() == Add)
2512 return false;
2513 if (Use->getOpcode() == ISD::ATOMIC_STORE &&
2514 cast<AtomicSDNode>(Use)->getVal() == Add)
2515 return false;
2516 }
2517
2518 return true;
2519}
2520
2522 unsigned MaxShiftAmount,
2524 SDValue &Scale) {
2525 EVT VT = Addr.getSimpleValueType();
2526 auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2527 SDValue &Shift) {
2528 uint64_t ShiftAmt = 0;
2529 Index = N;
2530
2531 if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
2532 // Only match shifts by a value in range [0, MaxShiftAmount].
2533 if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
2534 Index = N.getOperand(0);
2535 ShiftAmt = N.getConstantOperandVal(1);
2536 }
2537 }
2538
2539 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
2540 return ShiftAmt != 0;
2541 };
2542
2543 if (Addr.getOpcode() == ISD::ADD) {
2544 if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2545 SDValue AddrB = Addr.getOperand(0);
2546 if (AddrB.getOpcode() == ISD::ADD &&
2547 UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
2548 !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
2549 isInt<12>(C1->getSExtValue())) {
2550 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2551 SDValue C1Val =
2552 CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
2553 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
2554 AddrB.getOperand(1), C1Val),
2555 0);
2556 return true;
2557 }
2558 } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
2559 Base = Addr.getOperand(1);
2560 return true;
2561 } else {
2562 UnwrapShl(Addr.getOperand(1), Index, Scale);
2563 Base = Addr.getOperand(0);
2564 return true;
2565 }
2566 } else if (UnwrapShl(Addr, Index, Scale)) {
2567 EVT VT = Addr.getValueType();
2568 Base = CurDAG->getRegister(RISCV::X0, VT);
2569 return true;
2570 }
2571
2572 return false;
2573}
2574
2576 SDValue &Offset, bool IsINX) {
2578 return true;
2579
2580 SDLoc DL(Addr);
2581 MVT VT = Addr.getSimpleValueType();
2582
2583 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2584 Base = Addr.getOperand(0);
2585 Offset = Addr.getOperand(1);
2586 return true;
2587 }
2588
2589 int64_t RV32ZdinxRange = IsINX ? 4 : 0;
2591 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2592 if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
2593 Base = Addr.getOperand(0);
2594 if (Base.getOpcode() == RISCVISD::ADD_LO) {
2595 SDValue LoOperand = Base.getOperand(1);
2596 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2597 // If the Lo in (ADD_LO hi, lo) is a global variable's address
2598 // (its low part, really), then we can rely on the alignment of that
2599 // variable to provide a margin of safety before low part can overflow
2600 // the 12 bits of the load/store offset. Check if CVal falls within
2601 // that margin; if so (low part + CVal) can't overflow.
2602 const DataLayout &DL = CurDAG->getDataLayout();
2603 Align Alignment = commonAlignment(
2604 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2605 if (CVal == 0 || Alignment > CVal) {
2606 int64_t CombinedOffset = CVal + GA->getOffset();
2607 Base = Base.getOperand(0);
2609 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2610 CombinedOffset, GA->getTargetFlags());
2611 return true;
2612 }
2613 }
2614 }
2615
2616 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2617 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2618 Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2619 return true;
2620 }
2621 }
2622
2623 // Handle ADD with large immediates.
2624 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2625 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2626 assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
2627 "simm12 not already handled?");
2628
2629 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2630 // an ADDI for part of the offset and fold the rest into the load/store.
2631 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2632 if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
2633 int64_t Adj = CVal < 0 ? -2048 : 2047;
2634 Base = SDValue(
2635 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2636 CurDAG->getTargetConstant(Adj, DL, VT)),
2637 0);
2638 Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);
2639 return true;
2640 }
2641
2642 // For larger immediates, we might be able to save one instruction from
2643 // constant materialization by folding the Lo12 bits of the immediate into
2644 // the address. We should only do this if the ADD is only used by loads and
2645 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2646 // separately with the full materialized immediate creating extra
2647 // instructions.
2648 if (isWorthFoldingAdd(Addr) &&
2649 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2650 Offset)) {
2651 // Insert an ADD instruction with the materialized Hi52 bits.
2652 Base = SDValue(
2653 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2654 0);
2655 return true;
2656 }
2657 }
2658
2659 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset))
2660 return true;
2661
2662 Base = Addr;
2663 Offset = CurDAG->getTargetConstant(0, DL, VT);
2664 return true;
2665}
2666
2667/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2668/// Offset shoule be all zeros.
2670 SDValue &Offset) {
2672 return true;
2673
2674 SDLoc DL(Addr);
2675 MVT VT = Addr.getSimpleValueType();
2676
2678 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2679 if (isInt<12>(CVal)) {
2680 Base = Addr.getOperand(0);
2681
2682 // Early-out if not a valid offset.
2683 if ((CVal & 0b11111) != 0) {
2684 Base = Addr;
2685 Offset = CurDAG->getTargetConstant(0, DL, VT);
2686 return true;
2687 }
2688
2689 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2690 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2691 Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2692 return true;
2693 }
2694 }
2695
2696 // Handle ADD with large immediates.
2697 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2698 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2699 assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
2700 "simm12 not already handled?");
2701
2702 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
2703 // one instruction by folding adjustment (-2048 or 2016) into the address.
2704 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
2705 int64_t Adj = CVal < 0 ? -2048 : 2016;
2706 int64_t AdjustedOffset = CVal - Adj;
2708 RISCV::ADDI, DL, VT, Addr.getOperand(0),
2709 CurDAG->getTargetConstant(AdjustedOffset, DL, VT)),
2710 0);
2711 Offset = CurDAG->getTargetConstant(Adj, DL, VT);
2712 return true;
2713 }
2714
2715 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2716 Offset, true)) {
2717 // Insert an ADD instruction with the materialized Hi52 bits.
2718 Base = SDValue(
2719 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2720 0);
2721 return true;
2722 }
2723 }
2724
2725 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true))
2726 return true;
2727
2728 Base = Addr;
2729 Offset = CurDAG->getTargetConstant(0, DL, VT);
2730 return true;
2731}
2732
2734 SDValue &Offset) {
2735 if (Addr.getOpcode() != ISD::ADD)
2736 return false;
2737
2738 if (isa<ConstantSDNode>(Addr.getOperand(1)))
2739 return false;
2740
2741 Base = Addr.getOperand(1);
2742 Offset = Addr.getOperand(0);
2743 return true;
2744}
2745
2747 SDValue &ShAmt) {
2748 ShAmt = N;
2749
2750 // Peek through zext.
2751 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
2752 ShAmt = ShAmt.getOperand(0);
2753
2754 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
2755 // amount. If there is an AND on the shift amount, we can bypass it if it
2756 // doesn't affect any of those bits.
2757 if (ShAmt.getOpcode() == ISD::AND &&
2758 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2759 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
2760
2761 // Since the max shift amount is a power of 2 we can subtract 1 to make a
2762 // mask that covers the bits needed to represent all shift amounts.
2763 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
2764 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
2765
2766 if (ShMask.isSubsetOf(AndMask)) {
2767 ShAmt = ShAmt.getOperand(0);
2768 } else {
2769 // SimplifyDemandedBits may have optimized the mask so try restoring any
2770 // bits that are known zero.
2771 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
2772 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
2773 return true;
2774 ShAmt = ShAmt.getOperand(0);
2775 }
2776 }
2777
2778 if (ShAmt.getOpcode() == ISD::ADD &&
2779 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2780 uint64_t Imm = ShAmt.getConstantOperandVal(1);
2781 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
2782 // to avoid the ADD.
2783 if (Imm != 0 && Imm % ShiftWidth == 0) {
2784 ShAmt = ShAmt.getOperand(0);
2785 return true;
2786 }
2787 } else if (ShAmt.getOpcode() == ISD::SUB &&
2788 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
2789 uint64_t Imm = ShAmt.getConstantOperandVal(0);
2790 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2791 // generate a NEG instead of a SUB of a constant.
2792 if (Imm != 0 && Imm % ShiftWidth == 0) {
2793 SDLoc DL(ShAmt);
2794 EVT VT = ShAmt.getValueType();
2795 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
2796 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
2797 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
2798 ShAmt.getOperand(1));
2799 ShAmt = SDValue(Neg, 0);
2800 return true;
2801 }
2802 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2803 // to generate a NOT instead of a SUB of a constant.
2804 if (Imm % ShiftWidth == ShiftWidth - 1) {
2805 SDLoc DL(ShAmt);
2806 EVT VT = ShAmt.getValueType();
2807 MachineSDNode *Not =
2808 CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1),
2809 CurDAG->getTargetConstant(-1, DL, VT));
2810 ShAmt = SDValue(Not, 0);
2811 return true;
2812 }
2813 }
2814
2815 return true;
2816}
2817
2818/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2819/// check for equality with 0. This function emits instructions that convert the
2820/// seteq/setne into something that can be compared with 0.
2821/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
2822/// ISD::SETNE).
2824 SDValue &Val) {
2825 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
2826 "Unexpected condition code!");
2827
2828 // We're looking for a setcc.
2829 if (N->getOpcode() != ISD::SETCC)
2830 return false;
2831
2832 // Must be an equality comparison.
2833 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
2834 if (CCVal != ExpectedCCVal)
2835 return false;
2836
2837 SDValue LHS = N->getOperand(0);
2838 SDValue RHS = N->getOperand(1);
2839
2840 if (!LHS.getValueType().isScalarInteger())
2841 return false;
2842
2843 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
2844 if (isNullConstant(RHS)) {
2845 Val = LHS;
2846 return true;
2847 }
2848
2849 SDLoc DL(N);
2850
2851 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
2852 int64_t CVal = C->getSExtValue();
2853 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2854 // non-zero otherwise.
2855 if (CVal == -2048) {
2856 Val =
2858 RISCV::XORI, DL, N->getValueType(0), LHS,
2859 CurDAG->getTargetConstant(CVal, DL, N->getValueType(0))),
2860 0);
2861 return true;
2862 }
2863 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
2864 // LHS is equal to the RHS and non-zero otherwise.
2865 if (isInt<12>(CVal) || CVal == 2048) {
2866 Val =
2868 RISCV::ADDI, DL, N->getValueType(0), LHS,
2869 CurDAG->getTargetConstant(-CVal, DL, N->getValueType(0))),
2870 0);
2871 return true;
2872 }
2873 }
2874
2875 // If nothing else we can XOR the LHS and RHS to produce zero if they are
2876 // equal and a non-zero value if they aren't.
2877 Val = SDValue(
2878 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
2879 return true;
2880}
2881
2883 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
2884 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
2885 Val = N.getOperand(0);
2886 return true;
2887 }
2888
2889 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
2890 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
2891 return N;
2892
2893 SDValue N0 = N.getOperand(0);
2894 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2895 N.getConstantOperandVal(1) == ShiftAmt &&
2896 N0.getConstantOperandVal(1) == ShiftAmt)
2897 return N0.getOperand(0);
2898
2899 return N;
2900 };
2901
2902 MVT VT = N.getSimpleValueType();
2903 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
2904 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
2905 return true;
2906 }
2907
2908 return false;
2909}
2910
2912 if (N.getOpcode() == ISD::AND) {
2913 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
2914 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
2915 Val = N.getOperand(0);
2916 return true;
2917 }
2918 }
2919 MVT VT = N.getSimpleValueType();
2920 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
2921 if (CurDAG->MaskedValueIsZero(N, Mask)) {
2922 Val = N;
2923 return true;
2924 }
2925
2926 return false;
2927}
2928
2929/// Look for various patterns that can be done with a SHL that can be folded
2930/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
2931/// SHXADD we are trying to match.
2933 SDValue &Val) {
2934 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
2935 SDValue N0 = N.getOperand(0);
2936
2937 bool LeftShift = N0.getOpcode() == ISD::SHL;
2938 if ((LeftShift || N0.getOpcode() == ISD::SRL) &&
2939 isa<ConstantSDNode>(N0.getOperand(1))) {
2940 uint64_t Mask = N.getConstantOperandVal(1);
2941 unsigned C2 = N0.getConstantOperandVal(1);
2942
2943 unsigned XLen = Subtarget->getXLen();
2944 if (LeftShift)
2945 Mask &= maskTrailingZeros<uint64_t>(C2);
2946 else
2947 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
2948
2949 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
2950 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
2951 // followed by a SHXADD with c3 for the X amount.
2952 if (isShiftedMask_64(Mask)) {
2953 unsigned Leading = XLen - llvm::bit_width(Mask);
2954 unsigned Trailing = llvm::countr_zero(Mask);
2955 if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
2956 SDLoc DL(N);
2957 EVT VT = N.getValueType();
2959 RISCV::SRLI, DL, VT, N0.getOperand(0),
2960 CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
2961 0);
2962 return true;
2963 }
2964 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
2965 // leading zeros and c3 trailing zeros. We can use an SRLI by C3
2966 // followed by a SHXADD using c3 for the X amount.
2967 if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
2968 SDLoc DL(N);
2969 EVT VT = N.getValueType();
2970 Val = SDValue(
2972 RISCV::SRLI, DL, VT, N0.getOperand(0),
2973 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
2974 0);
2975 return true;
2976 }
2977 }
2978 }
2979 }
2980
2981 bool LeftShift = N.getOpcode() == ISD::SHL;
2982 if ((LeftShift || N.getOpcode() == ISD::SRL) &&
2983 isa<ConstantSDNode>(N.getOperand(1))) {
2984 SDValue N0 = N.getOperand(0);
2985 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
2986 isa<ConstantSDNode>(N0.getOperand(1))) {
2987 uint64_t Mask = N0.getConstantOperandVal(1);
2988 if (isShiftedMask_64(Mask)) {
2989 unsigned C1 = N.getConstantOperandVal(1);
2990 unsigned XLen = Subtarget->getXLen();
2991 unsigned Leading = XLen - llvm::bit_width(Mask);
2992 unsigned Trailing = llvm::countr_zero(Mask);
2993 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
2994 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
2995 if (LeftShift && Leading == 32 && Trailing > 0 &&
2996 (Trailing + C1) == ShAmt) {
2997 SDLoc DL(N);
2998 EVT VT = N.getValueType();
3000 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3001 CurDAG->getTargetConstant(Trailing, DL, VT)),
3002 0);
3003 return true;
3004 }
3005 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3006 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3007 if (!LeftShift && Leading == 32 && Trailing > C1 &&
3008 (Trailing - C1) == ShAmt) {
3009 SDLoc DL(N);
3010 EVT VT = N.getValueType();
3012 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3013 CurDAG->getTargetConstant(Trailing, DL, VT)),
3014 0);
3015 return true;
3016 }
3017 }
3018 }
3019 }
3020
3021 return false;
3022}
3023
3024/// Look for various patterns that can be done with a SHL that can be folded
3025/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3026/// SHXADD_UW we are trying to match.
3028 SDValue &Val) {
3029 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
3030 N.hasOneUse()) {
3031 SDValue N0 = N.getOperand(0);
3032 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3033 N0.hasOneUse()) {
3034 uint64_t Mask = N.getConstantOperandVal(1);
3035 unsigned C2 = N0.getConstantOperandVal(1);
3036
3037 Mask &= maskTrailingZeros<uint64_t>(C2);
3038
3039 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3040 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3041 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3042 if (isShiftedMask_64(Mask)) {
3043 unsigned Leading = llvm::countl_zero(Mask);
3044 unsigned Trailing = llvm::countr_zero(Mask);
3045 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3046 SDLoc DL(N);
3047 EVT VT = N.getValueType();
3049 RISCV::SLLI, DL, VT, N0.getOperand(0),
3050 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
3051 0);
3052 return true;
3053 }
3054 }
3055 }
3056 }
3057
3058 return false;
3059}
3060
3061static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
3062 unsigned Bits,
3063 const TargetInstrInfo *TII) {
3064 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
3065
3066 if (!MCOpcode)
3067 return false;
3068
3069 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
3070 const uint64_t TSFlags = MCID.TSFlags;
3071 if (!RISCVII::hasSEWOp(TSFlags))
3072 return false;
3073 assert(RISCVII::hasVLOp(TSFlags));
3074
3075 bool HasGlueOp = User->getGluedNode() != nullptr;
3076 unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
3077 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
3078 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
3079 unsigned VLIdx =
3080 User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3081 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
3082
3083 if (UserOpNo == VLIdx)
3084 return false;
3085
3086 auto NumDemandedBits =
3087 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
3088 return NumDemandedBits && Bits >= *NumDemandedBits;
3089}
3090
3091// Return true if all users of this SDNode* only consume the lower \p Bits.
3092// This can be used to form W instructions for add/sub/mul/shl even when the
3093// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3094// SimplifyDemandedBits has made it so some users see a sext_inreg and some
3095// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3096// the add/sub/mul/shl to become non-W instructions. By checking the users we
3097// may be able to use a W instruction and CSE with the other instruction if
3098// this has happened. We could try to detect that the CSE opportunity exists
3099// before doing this, but that would be more complicated.
3101 const unsigned Depth) const {
3102 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3103 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3104 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3105 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3106 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3107 isa<ConstantSDNode>(Node) || Depth != 0) &&
3108 "Unexpected opcode");
3109
3111 return false;
3112
3113 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3114 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3115 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
3116 return false;
3117
3118 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
3119 SDNode *User = *UI;
3120 // Users of this node should have already been instruction selected
3121 if (!User->isMachineOpcode())
3122 return false;
3123
3124 // TODO: Add more opcodes?
3125 switch (User->getMachineOpcode()) {
3126 default:
3127 if (vectorPseudoHasAllNBitUsers(User, UI.getOperandNo(), Bits, TII))
3128 break;
3129 return false;
3130 case RISCV::ADDW:
3131 case RISCV::ADDIW:
3132 case RISCV::SUBW:
3133 case RISCV::MULW:
3134 case RISCV::SLLW:
3135 case RISCV::SLLIW:
3136 case RISCV::SRAW:
3137 case RISCV::SRAIW:
3138 case RISCV::SRLW:
3139 case RISCV::SRLIW:
3140 case RISCV::DIVW:
3141 case RISCV::DIVUW:
3142 case RISCV::REMW:
3143 case RISCV::REMUW:
3144 case RISCV::ROLW:
3145 case RISCV::RORW:
3146 case RISCV::RORIW:
3147 case RISCV::CLZW:
3148 case RISCV::CTZW:
3149 case RISCV::CPOPW:
3150 case RISCV::SLLI_UW:
3151 case RISCV::FMV_W_X:
3152 case RISCV::FCVT_H_W:
3153 case RISCV::FCVT_H_WU:
3154 case RISCV::FCVT_S_W:
3155 case RISCV::FCVT_S_WU:
3156 case RISCV::FCVT_D_W:
3157 case RISCV::FCVT_D_WU:
3158 case RISCV::TH_REVW:
3159 case RISCV::TH_SRRIW:
3160 if (Bits < 32)
3161 return false;
3162 break;
3163 case RISCV::SLL:
3164 case RISCV::SRA:
3165 case RISCV::SRL:
3166 case RISCV::ROL:
3167 case RISCV::ROR:
3168 case RISCV::BSET:
3169 case RISCV::BCLR:
3170 case RISCV::BINV:
3171 // Shift amount operands only use log2(Xlen) bits.
3172 if (UI.getOperandNo() != 1 || Bits < Log2_32(Subtarget->getXLen()))
3173 return false;
3174 break;
3175 case RISCV::SLLI:
3176 // SLLI only uses the lower (XLen - ShAmt) bits.
3177 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
3178 return false;
3179 break;
3180 case RISCV::ANDI:
3181 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
3182 break;
3183 goto RecCheck;
3184 case RISCV::ORI: {
3185 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3186 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
3187 break;
3188 [[fallthrough]];
3189 }
3190 case RISCV::AND:
3191 case RISCV::OR:
3192 case RISCV::XOR:
3193 case RISCV::XORI:
3194 case RISCV::ANDN:
3195 case RISCV::ORN:
3196 case RISCV::XNOR:
3197 case RISCV::SH1ADD:
3198 case RISCV::SH2ADD:
3199 case RISCV::SH3ADD:
3200 RecCheck:
3201 if (hasAllNBitUsers(User, Bits, Depth + 1))
3202 break;
3203 return false;
3204 case RISCV::SRLI: {
3205 unsigned ShAmt = User->getConstantOperandVal(1);
3206 // If we are shifting right by less than Bits, and users don't demand any
3207 // bits that were shifted into [Bits-1:0], then we can consider this as an
3208 // N-Bit user.
3209 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
3210 break;
3211 return false;
3212 }
3213 case RISCV::SEXT_B:
3214 case RISCV::PACKH:
3215 if (Bits < 8)
3216 return false;
3217 break;
3218 case RISCV::SEXT_H:
3219 case RISCV::FMV_H_X:
3220 case RISCV::ZEXT_H_RV32:
3221 case RISCV::ZEXT_H_RV64:
3222 case RISCV::PACKW:
3223 if (Bits < 16)
3224 return false;
3225 break;
3226 case RISCV::PACK:
3227 if (Bits < (Subtarget->getXLen() / 2))
3228 return false;
3229 break;
3230 case RISCV::ADD_UW:
3231 case RISCV::SH1ADD_UW:
3232 case RISCV::SH2ADD_UW:
3233 case RISCV::SH3ADD_UW:
3234 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3235 // 32 bits.
3236 if (UI.getOperandNo() != 0 || Bits < 32)
3237 return false;
3238 break;
3239 case RISCV::SB:
3240 if (UI.getOperandNo() != 0 || Bits < 8)
3241 return false;
3242 break;
3243 case RISCV::SH:
3244 if (UI.getOperandNo() != 0 || Bits < 16)
3245 return false;
3246 break;
3247 case RISCV::SW:
3248 if (UI.getOperandNo() != 0 || Bits < 32)
3249 return false;
3250 break;
3251 }
3252 }
3253
3254 return true;
3255}
3256
3257// Select a constant that can be represented as (sign_extend(imm5) << imm2).
3259 SDValue &Shl2) {
3260 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3261 int64_t Offset = C->getSExtValue();
3262 int64_t Shift;
3263 for (Shift = 0; Shift < 4; Shift++)
3264 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
3265 break;
3266
3267 // Constant cannot be encoded.
3268 if (Shift == 4)
3269 return false;
3270
3271 EVT Ty = N->getValueType(0);
3272 Simm5 = CurDAG->getTargetConstant(Offset >> Shift, SDLoc(N), Ty);
3273 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
3274 return true;
3275 }
3276
3277 return false;
3278}
3279
3280// Select VL as a 5 bit immediate or a value that will become a register. This
3281// allows us to choose betwen VSETIVLI or VSETVLI later.
3283 auto *C = dyn_cast<ConstantSDNode>(N);
3284 if (C && isUInt<5>(C->getZExtValue())) {
3285 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
3286 N->getValueType(0));
3287 } else if (C && C->isAllOnes()) {
3288 // Treat all ones as VLMax.
3290 N->getValueType(0));
3291 } else if (isa<RegisterSDNode>(N) &&
3292 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
3293 // All our VL operands use an operand that allows GPRNoX0 or an immediate
3294 // as the register class. Convert X0 to a special immediate to pass the
3295 // MachineVerifier. This is recognized specially by the vsetvli insertion
3296 // pass.
3298 N->getValueType(0));
3299 } else {
3300 VL = N;
3301 }
3302
3303 return true;
3304}
3305
3307 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
3308 if (!N.getOperand(0).isUndef())
3309 return SDValue();
3310 N = N.getOperand(1);
3311 }
3312 SDValue Splat = N;
3313 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
3314 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
3315 !Splat.getOperand(0).isUndef())
3316 return SDValue();
3317 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
3318 return Splat;
3319}
3320
3323 if (!Splat)
3324 return false;
3325
3326 SplatVal = Splat.getOperand(1);
3327 return true;
3328}
3329
3331 SelectionDAG &DAG,
3332 const RISCVSubtarget &Subtarget,
3333 std::function<bool(int64_t)> ValidateImm) {
3335 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
3336 return false;
3337
3338 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
3339 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
3340 "Unexpected splat operand type");
3341
3342 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3343 // type is wider than the resulting vector element type: an implicit
3344 // truncation first takes place. Therefore, perform a manual
3345 // truncation/sign-extension in order to ignore any truncated bits and catch
3346 // any zero-extended immediate.
3347 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3348 // sign-extending to (XLenVT -1).
3349 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
3350
3351 int64_t SplatImm = SplatConst.getSExtValue();
3352
3353 if (!ValidateImm(SplatImm))
3354 return false;
3355
3356 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
3357 return true;
3358}
3359
3361 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
3362 [](int64_t Imm) { return isInt<5>(Imm); });
3363}
3364
3366 return selectVSplatImmHelper(
3367 N, SplatVal, *CurDAG, *Subtarget,
3368 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
3369}
3370
3372 SDValue &SplatVal) {
3373 return selectVSplatImmHelper(
3374 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
3375 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
3376 });
3377}
3378
3380 SDValue &SplatVal) {
3381 return selectVSplatImmHelper(
3382 N, SplatVal, *CurDAG, *Subtarget,
3383 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
3384}
3385
3387 auto IsExtOrTrunc = [](SDValue N) {
3388 switch (N->getOpcode()) {
3389 case ISD::SIGN_EXTEND:
3390 case ISD::ZERO_EXTEND:
3391 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
3392 // inactive elements will be undef.
3394 case RISCVISD::VSEXT_VL:
3395 case RISCVISD::VZEXT_VL:
3396 return true;
3397 default:
3398 return false;
3399 }
3400 };
3401
3402 // We can have multiple nested nodes, so unravel them all if needed.
3403 while (IsExtOrTrunc(N)) {
3404 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
3405 return false;
3406 N = N->getOperand(0);
3407 }
3408
3409 return selectVSplat(N, SplatVal);
3410}
3411
3413 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3414 if (!CFP)
3415 return false;
3416 const APFloat &APF = CFP->getValueAPF();
3417 // td can handle +0.0 already.
3418 if (APF.isPosZero())
3419 return false;
3420
3421 MVT VT = CFP->getSimpleValueType(0);
3422
3423 // Even if this FPImm requires an additional FNEG (i.e. the second element of
3424 // the returned pair is true) we still prefer FLI + FNEG over immediate
3425 // materialization as the latter might generate a longer instruction sequence.
3426 if (static_cast<const RISCVTargetLowering *>(TLI)
3427 ->getLegalZfaFPImm(APF, VT)
3428 .first >= 0)
3429 return false;
3430
3431 MVT XLenVT = Subtarget->getXLenVT();
3432 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3433 assert(APF.isNegZero() && "Unexpected constant.");
3434 return false;
3435 }
3436 SDLoc DL(N);
3437 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3438 *Subtarget);
3439 return true;
3440}
3441
3443 SDValue &Imm) {
3444 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3445 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3446
3447 if (!isInt<5>(ImmVal))
3448 return false;
3449
3450 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());
3451 return true;
3452 }
3453
3454 return false;
3455}
3456
3457// Try to remove sext.w if the input is a W instruction or can be made into
3458// a W instruction cheaply.
3459bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3460 // Look for the sext.w pattern, addiw rd, rs1, 0.
3461 if (N->getMachineOpcode() != RISCV::ADDIW ||
3462 !isNullConstant(N->getOperand(1)))
3463 return false;
3464
3465 SDValue N0 = N->getOperand(0);
3466 if (!N0.isMachineOpcode())
3467 return false;
3468
3469 switch (N0.getMachineOpcode()) {
3470 default:
3471 break;
3472 case RISCV::ADD:
3473 case RISCV::ADDI:
3474 case RISCV::SUB:
3475 case RISCV::MUL:
3476 case RISCV::SLLI: {
3477 // Convert sext.w+add/sub/mul to their W instructions. This will create
3478 // a new independent instruction. This improves latency.
3479 unsigned Opc;
3480 switch (N0.getMachineOpcode()) {
3481 default:
3482 llvm_unreachable("Unexpected opcode!");
3483 case RISCV::ADD: Opc = RISCV::ADDW; break;
3484 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3485 case RISCV::SUB: Opc = RISCV::SUBW; break;
3486 case RISCV::MUL: Opc = RISCV::MULW; break;
3487 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3488 }
3489
3490 SDValue N00 = N0.getOperand(0);
3491 SDValue N01 = N0.getOperand(1);
3492
3493 // Shift amount needs to be uimm5.
3494 if (N0.getMachineOpcode() == RISCV::SLLI &&
3495 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
3496 break;
3497
3498 SDNode *Result =
3499 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
3500 N00, N01);
3501 ReplaceUses(N, Result);
3502 return true;
3503 }
3504 case RISCV::ADDW:
3505 case RISCV::ADDIW:
3506 case RISCV::SUBW:
3507 case RISCV::MULW:
3508 case RISCV::SLLIW:
3509 case RISCV::PACKW:
3510 case RISCV::TH_MULAW:
3511 case RISCV::TH_MULAH:
3512 case RISCV::TH_MULSW:
3513 case RISCV::TH_MULSH:
3514 if (N0.getValueType() == MVT::i32)
3515 break;
3516
3517 // Result is already sign extended just remove the sext.w.
3518 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
3519 ReplaceUses(N, N0.getNode());
3520 return true;
3521 }
3522
3523 return false;
3524}
3525
3526static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
3527 // Check that we're using V0 as a mask register.
3528 if (!isa<RegisterSDNode>(MaskOp) ||
3529 cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)
3530 return false;
3531
3532 // The glued user defines V0.
3533 const auto *Glued = GlueOp.getNode();
3534
3535 if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
3536 return false;
3537
3538 // Check that we're defining V0 as a mask register.
3539 if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
3540 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
3541 return false;
3542
3543 // Check the instruction defining V0; it needs to be a VMSET pseudo.
3544 SDValue MaskSetter = Glued->getOperand(2);
3545
3546 // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
3547 // from an extract_subvector or insert_subvector.
3548 if (MaskSetter->isMachineOpcode() &&
3549 MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)
3550 MaskSetter = MaskSetter->getOperand(0);
3551
3552 const auto IsVMSet = [](unsigned Opc) {
3553 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
3554 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
3555 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
3556 Opc == RISCV::PseudoVMSET_M_B8;
3557 };
3558
3559 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3560 // undefined behaviour if it's the wrong bitwidth, so we could choose to
3561 // assume that it's all-ones? Same applies to its VL.
3562 return MaskSetter->isMachineOpcode() &&
3563 IsVMSet(MaskSetter.getMachineOpcode());
3564}
3565
3566// Return true if we can make sure mask of N is all-ones mask.
3567static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
3568 return usesAllOnesMask(N->getOperand(MaskOpIdx),
3569 N->getOperand(N->getNumOperands() - 1));
3570}
3571
3572static bool isImplicitDef(SDValue V) {
3573 if (!V.isMachineOpcode())
3574 return false;
3575 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
3576 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
3577 if (!isImplicitDef(V.getOperand(I)))
3578 return false;
3579 return true;
3580 }
3581 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
3582}
3583
3584// Optimize masked RVV pseudo instructions with a known all-ones mask to their
3585// corresponding "unmasked" pseudo versions. The mask we're interested in will
3586// take the form of a V0 physical register operand, with a glued
3587// register-setting instruction.
3588bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
3590 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
3591 if (!I)
3592 return false;
3593
3594 unsigned MaskOpIdx = I->MaskOpIdx;
3595 if (!usesAllOnesMask(N, MaskOpIdx))
3596 return false;
3597
3598 // There are two classes of pseudos in the table - compares and
3599 // everything else. See the comment on RISCVMaskedPseudo for details.
3600 const unsigned Opc = I->UnmaskedPseudo;
3601 const MCInstrDesc &MCID = TII->get(Opc);
3602 const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags);
3603#ifndef NDEBUG
3604 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
3607 "Masked and unmasked pseudos are inconsistent");
3608 const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID);
3609 assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure");
3610#endif
3611
3613 // Skip the merge operand at index 0 if !UseTUPseudo.
3614 for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) {
3615 // Skip the mask, and the Glue.
3616 SDValue Op = N->getOperand(I);
3617 if (I == MaskOpIdx || Op.getValueType() == MVT::Glue)
3618 continue;
3619 Ops.push_back(Op);
3620 }
3621
3622 // Transitively apply any node glued to our new node.
3623 const auto *Glued = N->getGluedNode();
3624 if (auto *TGlued = Glued->getGluedNode())
3625 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
3626
3628 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3629
3630 if (!N->memoperands_empty())
3631 CurDAG->setNodeMemRefs(Result, N->memoperands());
3632
3633 Result->setFlags(N->getFlags());
3634 ReplaceUses(N, Result);
3635
3636 return true;
3637}
3638
3639static bool IsVMerge(SDNode *N) {
3640 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
3641}
3642
3643static bool IsVMv(SDNode *N) {
3644 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V;
3645}
3646
3647static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) {
3648 switch (LMUL) {
3649 case RISCVII::LMUL_F8:
3650 return RISCV::PseudoVMSET_M_B1;
3651 case RISCVII::LMUL_F4:
3652 return RISCV::PseudoVMSET_M_B2;
3653 case RISCVII::LMUL_F2:
3654 return RISCV::PseudoVMSET_M_B4;
3655 case RISCVII::LMUL_1:
3656 return RISCV::PseudoVMSET_M_B8;
3657 case RISCVII::LMUL_2:
3658 return RISCV::PseudoVMSET_M_B16;
3659 case RISCVII::LMUL_4:
3660 return RISCV::PseudoVMSET_M_B32;
3661 case RISCVII::LMUL_8:
3662 return RISCV::PseudoVMSET_M_B64;
3664 llvm_unreachable("Unexpected LMUL");
3665 }
3666 llvm_unreachable("Unknown VLMUL enum");
3667}
3668
3669// Try to fold away VMERGE_VVM instructions into their true operands:
3670//
3671// %true = PseudoVADD_VV ...
3672// %x = PseudoVMERGE_VVM %false, %false, %true, %mask
3673// ->
3674// %x = PseudoVADD_VV_MASK %false, ..., %mask
3675//
3676// We can only fold if vmerge's merge operand, vmerge's false operand and
3677// %true's merge operand (if it has one) are the same. This is because we have
3678// to consolidate them into one merge operand in the result.
3679//
3680// If %true is masked, then we can use its mask instead of vmerge's if vmerge's
3681// mask is all ones.
3682//
3683// We can also fold a VMV_V_V into its true operand, since it is equivalent to a
3684// VMERGE_VVM with an all ones mask.
3685//
3686// The resulting VL is the minimum of the two VLs.
3687//
3688// The resulting policy is the effective policy the vmerge would have had,
3689// i.e. whether or not it's merge operand was implicit-def.
3690bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
3691 SDValue Merge, False, True, VL, Mask, Glue;
3692 // A vmv.v.v is equivalent to a vmerge with an all-ones mask.
3693 if (IsVMv(N)) {
3694 Merge = N->getOperand(0);
3695 False = N->getOperand(0);
3696 True = N->getOperand(1);
3697 VL = N->getOperand(2);
3698 // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones
3699 // mask later below.
3700 } else {
3701 assert(IsVMerge(N));
3702 Merge = N->getOperand(0);
3703 False = N->getOperand(1);
3704 True = N->getOperand(2);
3705 Mask = N->getOperand(3);
3706 VL = N->getOperand(4);
3707 // We always have a glue node for the mask at v0.
3708 Glue = N->getOperand(N->getNumOperands() - 1);
3709 }
3710 assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
3711 assert(!Glue || Glue.getValueType() == MVT::Glue);
3712
3713 // We require that either merge and false are the same, or that merge
3714 // is undefined.
3715 if (Merge != False && !isImplicitDef(Merge))
3716 return false;
3717
3718 assert(True.getResNo() == 0 &&
3719 "Expect True is the first output of an instruction.");
3720
3721 // Need N is the exactly one using True.
3722 if (!True.hasOneUse())
3723 return false;
3724
3725 if (!True.isMachineOpcode())
3726 return false;
3727
3728 unsigned TrueOpc = True.getMachineOpcode();
3729 const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
3730 uint64_t TrueTSFlags = TrueMCID.TSFlags;
3731 bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
3732
3733 bool IsMasked = false;
3735 RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
3736 if (!Info && HasTiedDest) {
3737 Info = RISCV::getMaskedPseudoInfo(TrueOpc);
3738 IsMasked = true;
3739 }
3740
3741 if (!Info)
3742 return false;
3743
3744 // When Mask is not a true mask, this transformation is illegal for some
3745 // operations whose results are affected by mask, like viota.m.
3746 if (Info->MaskAffectsResult && Mask && !usesAllOnesMask(Mask, Glue))
3747 return false;
3748
3749 // If True has a merge operand then it needs to be the same as vmerge's False,
3750 // since False will be used for the result's merge operand.
3751 if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
3752 // The vmerge instruction must be TU.
3753 // FIXME: This could be relaxed, but we need to handle the policy for the
3754 // resulting op correctly.
3755 if (isImplicitDef(Merge))
3756 return false;
3757 SDValue MergeOpTrue = True->getOperand(0);
3758 if (False != MergeOpTrue)
3759 return false;
3760 }
3761
3762 // If True is masked then the vmerge must have an all 1s mask, since we're
3763 // going to keep the mask from True.
3764 if (IsMasked) {
3765 assert(HasTiedDest && "Expected tied dest");
3766 // The vmerge instruction must be TU.
3767 if (isImplicitDef(Merge))
3768 return false;
3769 // FIXME: Support mask agnostic True instruction which would have an
3770 // undef merge operand.
3771 if (Mask && !usesAllOnesMask(Mask, Glue))
3772 return false;
3773 }
3774
3775 // Skip if True has side effect.
3776 if (TII->get(TrueOpc).hasUnmodeledSideEffects())
3777 return false;
3778
3779 // The last operand of a masked instruction may be glued.
3780 bool HasGlueOp = True->getGluedNode() != nullptr;
3781
3782 // The chain operand may exist either before the glued operands or in the last
3783 // position.
3784 unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
3785 bool HasChainOp =
3786 True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
3787
3788 if (HasChainOp) {
3789 // Avoid creating cycles in the DAG. We must ensure that none of the other
3790 // operands depend on True through it's Chain.
3791 SmallVector<const SDNode *, 4> LoopWorklist;
3793 LoopWorklist.push_back(False.getNode());
3794 if (Mask)
3795 LoopWorklist.push_back(Mask.getNode());
3796 LoopWorklist.push_back(VL.getNode());
3797 if (Glue)
3798 LoopWorklist.push_back(Glue.getNode());
3799 if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
3800 return false;
3801 }
3802
3803 // The vector policy operand may be present for masked intrinsics
3804 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
3805 unsigned TrueVLIndex =
3806 True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3807 SDValue TrueVL = True.getOperand(TrueVLIndex);
3808 SDValue SEW = True.getOperand(TrueVLIndex + 1);
3809
3810 auto GetMinVL = [](SDValue LHS, SDValue RHS) {
3811 if (LHS == RHS)
3812 return LHS;
3813 if (isAllOnesConstant(LHS))
3814 return RHS;
3815 if (isAllOnesConstant(RHS))
3816 return LHS;
3817 auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
3818 auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
3819 if (!CLHS || !CRHS)
3820 return SDValue();
3821 return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
3822 };
3823
3824 // Because N and True must have the same merge operand (or True's operand is
3825 // implicit_def), the "effective" body is the minimum of their VLs.
3826 SDValue OrigVL = VL;
3827 VL = GetMinVL(TrueVL, VL);
3828 if (!VL)
3829 return false;
3830
3831 // If we end up changing the VL or mask of True, then we need to make sure it
3832 // doesn't raise any observable fp exceptions, since changing the active
3833 // elements will affect how fflags is set.
3834 if (TrueVL != VL || !IsMasked)
3835 if (mayRaiseFPException(True.getNode()) &&
3836 !True->getFlags().hasNoFPExcept())
3837 return false;
3838
3839 SDLoc DL(N);
3840
3841 // From the preconditions we checked above, we know the mask and thus glue
3842 // for the result node will be taken from True.
3843 if (IsMasked) {
3844 Mask = True->getOperand(Info->MaskOpIdx);
3845 Glue = True->getOperand(True->getNumOperands() - 1);
3846 assert(Glue.getValueType() == MVT::Glue);
3847 }
3848 // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create
3849 // an all-ones mask to use.
3850 else if (IsVMv(N)) {
3851 unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags;
3852 unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags));
3853 ElementCount EC = N->getValueType(0).getVectorElementCount();
3854 MVT MaskVT = MVT::getVectorVT(MVT::i1, EC);
3855
3856 SDValue AllOnesMask =
3857 SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0);
3859 RISCV::V0, AllOnesMask, SDValue());
3860 Mask = CurDAG->getRegister(RISCV::V0, MaskVT);
3861 Glue = MaskCopy.getValue(1);
3862 }
3863
3864 unsigned MaskedOpc = Info->MaskedPseudo;
3865#ifndef NDEBUG
3866 const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
3868 "Expected instructions with mask have policy operand.");
3869 assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
3870 MCOI::TIED_TO) == 0 &&
3871 "Expected instructions with mask have a tied dest.");
3872#endif
3873
3874 // Use a tumu policy, relaxing it to tail agnostic provided that the merge
3875 // operand is undefined.
3876 //
3877 // However, if the VL became smaller than what the vmerge had originally, then
3878 // elements past VL that were previously in the vmerge's body will have moved
3879 // to the tail. In that case we always need to use tail undisturbed to
3880 // preserve them.
3881 bool MergeVLShrunk = VL != OrigVL;
3882 uint64_t Policy = (isImplicitDef(Merge) && !MergeVLShrunk)
3884 : /*TUMU*/ 0;
3885 SDValue PolicyOp =
3886 CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
3887
3888
3890 Ops.push_back(False);
3891
3892 const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
3893 const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode;
3894 assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx);
3895 Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
3896
3897 Ops.push_back(Mask);
3898
3899 // For unmasked "VOp" with rounding mode operand, that is interfaces like
3900 // (..., rm, vl) or (..., rm, vl, policy).
3901 // Its masked version is (..., vm, rm, vl, policy).
3902 // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
3903 if (HasRoundingMode)
3904 Ops.push_back(True->getOperand(TrueVLIndex - 1));
3905
3906 Ops.append({VL, SEW, PolicyOp});
3907
3908 // Result node should have chain operand of True.
3909 if (HasChainOp)
3910 Ops.push_back(True.getOperand(TrueChainOpIdx));
3911
3912 // Add the glue for the CopyToReg of mask->v0.
3913 Ops.push_back(Glue);
3914
3916 CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
3917 Result->setFlags(True->getFlags());
3918
3919 if (!cast<MachineSDNode>(True)->memoperands_empty())
3920 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
3921
3922 // Replace vmerge.vvm node by Result.
3923 ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
3924
3925 // Replace another value of True. E.g. chain and VL.
3926 for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
3927 ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
3928
3929 return true;
3930}
3931
3932bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
3933 bool MadeChange = false;
3935
3936 while (Position != CurDAG->allnodes_begin()) {
3937 SDNode *N = &*--Position;
3938 if (N->use_empty() || !N->isMachineOpcode())
3939 continue;
3940
3941 if (IsVMerge(N) || IsVMv(N))
3942 MadeChange |= performCombineVMergeAndVOps(N);
3943 }
3944 return MadeChange;
3945}
3946
3947/// If our passthru is an implicit_def, use noreg instead. This side
3948/// steps issues with MachineCSE not being able to CSE expressions with
3949/// IMPLICIT_DEF operands while preserving the semantic intent. See
3950/// pr64282 for context. Note that this transform is the last one
3951/// performed at ISEL DAG to DAG.
3952bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
3953 bool MadeChange = false;
3955
3956 while (Position != CurDAG->allnodes_begin()) {
3957 SDNode *N = &*--Position;
3958 if (N->use_empty() || !N->isMachineOpcode())
3959 continue;
3960
3961 const unsigned Opc = N->getMachineOpcode();
3962 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
3964 !isImplicitDef(N->getOperand(0)))
3965 continue;
3966
3968 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
3969 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
3970 SDValue Op = N->getOperand(I);
3971 Ops.push_back(Op);
3972 }
3973
3975 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3976 Result->setFlags(N->getFlags());
3977 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
3978 ReplaceUses(N, Result);
3979 MadeChange = true;
3980 }
3981 return MadeChange;
3982}
3983
3984
3985// This pass converts a legalized DAG into a RISCV-specific DAG, ready
3986// for instruction scheduling.
3988 CodeGenOptLevel OptLevel) {
3989 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
3990}
3991
3993
3995 CodeGenOptLevel OptLevel)
3997 ID, std::make_unique<RISCVDAGToDAGISel>(TM, OptLevel)) {}
3998
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
R600 Clause Merge
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp)
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false)
static bool IsVMv(SDNode *N)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b)
static SDValue findVSplat(SDValue N)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm)
static bool IsVMerge(SDNode *N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define PASS_NAME
Value * RHS
Value * LHS
bool isZero() const
Definition: APFloat.h:1344
APInt bitcastToAPInt() const
Definition: APFloat.h:1254
bool isPosZero() const
Definition: APFloat.h:1359
bool isNegZero() const
Definition: APFloat.h:1360
Class for arbitrary precision integers.
Definition: APInt.h:77
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1447
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1236
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:265
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1521
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
This class is used to form a handle around another node that is persistent and is updated across invo...
static StringRef getMemConstraintName(ConstraintCode C)
Definition: InlineAsm.h:467
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
Definition: MCInstrDesc.h:463
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TargetMachine, CodeGenOptLevel OptLevel)
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
void selectVSSEG(SDNode *Node, bool IsMasked, bool IsStrided)
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset shoule be all zeros.
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset)
bool SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void selectVLSEGFF(SDNode *Node, bool IsMasked)
bool selectFPImm(SDValue N, SDValue &Imm)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool hasAllBUsers(SDNode *Node) const
void selectVLXSEG(SDNode *Node, bool IsMasked, bool IsOrdered)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
void selectVSXSEG(SDNode *Node, bool IsMasked, bool IsOrdered)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset, bool IsINX=false)
void selectVLSEG(SDNode *Node, bool IsMasked, bool IsStrided)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
unsigned getXLen() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVII::VLMUL getLMUL(MVT VT)
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
const TargetLowering * TLI
MachineFunction * MF
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
bool mayRaiseFPException(SDNode *Node) const
Return whether the node may raise an FP exception.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:227
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:736
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:565
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:451
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:545
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:546
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:486
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:741
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:787
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:690
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:481
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:813
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:574
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:568
ilist< SDNode >::iterator allnodes_iterator
Definition: SelectionDAG.h:548
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
TargetInstrInfo - Interface to description of machine instruction set.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:345
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:764
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:567
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1262
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1052
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:797
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:917
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:788
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1242
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1258
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:635
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:719
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:581
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:794
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:812
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:694
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1503
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1554
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1599
static bool hasRoundModeOp(uint64_t TSFlags)
static VLMUL getLMul(uint64_t TSFlags)
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul)
unsigned encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic)
std::optional< unsigned > getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
static const MachineMemOperand::Flags MONontemporalBit1
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:239
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:269
unsigned M1(unsigned Val)
Definition: VE.h:376
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:257
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
@ Add
Sum of integers.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:509
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
bool hasNoFPExcept() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.