LLVM 20.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
21#include "llvm/IR/IntrinsicsRISCV.h"
23#include "llvm/Support/Debug.h"
26
27using namespace llvm;
28
29#define DEBUG_TYPE "riscv-isel"
30#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
31
33 "riscv-use-rematerializable-movimm", cl::Hidden,
34 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
35 "constant materialization"),
36 cl::init(false));
37
38namespace llvm::RISCV {
39#define GET_RISCVVSSEGTable_IMPL
40#define GET_RISCVVLSEGTable_IMPL
41#define GET_RISCVVLXSEGTable_IMPL
42#define GET_RISCVVSXSEGTable_IMPL
43#define GET_RISCVVLETable_IMPL
44#define GET_RISCVVSETable_IMPL
45#define GET_RISCVVLXTable_IMPL
46#define GET_RISCVVSXTable_IMPL
47#include "RISCVGenSearchableTables.inc"
48} // namespace llvm::RISCV
49
52
53 bool MadeChange = false;
54 while (Position != CurDAG->allnodes_begin()) {
55 SDNode *N = &*--Position;
56 if (N->use_empty())
57 continue;
58
59 SDValue Result;
60 switch (N->getOpcode()) {
61 case ISD::SPLAT_VECTOR: {
62 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
63 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
64 MVT VT = N->getSimpleValueType(0);
65 unsigned Opc =
67 SDLoc DL(N);
68 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
69 SDValue Src = N->getOperand(0);
70 if (VT.isInteger())
71 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
72 N->getOperand(0));
73 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
74 break;
75 }
77 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
78 // load. Done after lowering and combining so that we have a chance to
79 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
80 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
81 MVT VT = N->getSimpleValueType(0);
82 SDValue Passthru = N->getOperand(0);
83 SDValue Lo = N->getOperand(1);
84 SDValue Hi = N->getOperand(2);
85 SDValue VL = N->getOperand(3);
86 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
87 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
88 "Unexpected VTs!");
90 SDLoc DL(N);
91
92 // Create temporary stack for each expanding node.
93 SDValue StackSlot =
95 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
97
98 SDValue Chain = CurDAG->getEntryNode();
99 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
100
101 SDValue OffsetSlot =
103 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
104 Align(8));
105
106 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
107
108 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
109 SDValue IntID =
110 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
111 SDValue Ops[] = {Chain,
112 IntID,
113 Passthru,
114 StackSlot,
115 CurDAG->getRegister(RISCV::X0, MVT::i64),
116 VL};
117
119 MVT::i64, MPI, Align(8),
121 break;
122 }
123 }
124
125 if (Result) {
126 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
127 LLVM_DEBUG(N->dump(CurDAG));
128 LLVM_DEBUG(dbgs() << "\nNew: ");
129 LLVM_DEBUG(Result->dump(CurDAG));
130 LLVM_DEBUG(dbgs() << "\n");
131
133 MadeChange = true;
134 }
135 }
136
137 if (MadeChange)
139}
140
142 HandleSDNode Dummy(CurDAG->getRoot());
144
145 bool MadeChange = false;
146 while (Position != CurDAG->allnodes_begin()) {
147 SDNode *N = &*--Position;
148 // Skip dead nodes and any non-machine opcodes.
149 if (N->use_empty() || !N->isMachineOpcode())
150 continue;
151
152 MadeChange |= doPeepholeSExtW(N);
153
154 // FIXME: This is here only because the VMerge transform doesn't
155 // know how to handle masked true inputs. Once that has been moved
156 // to post-ISEL, this can be deleted as well.
157 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
158 }
159
160 CurDAG->setRoot(Dummy.getValue());
161
162 MadeChange |= doPeepholeMergeVVMFold();
163
164 // After we're done with everything else, convert IMPLICIT_DEF
165 // passthru operands to NoRegister. This is required to workaround
166 // an optimization deficiency in MachineCSE. This really should
167 // be merged back into each of the patterns (i.e. there's no good
168 // reason not to go directly to NoReg), but is being done this way
169 // to allow easy backporting.
170 MadeChange |= doPeepholeNoRegPassThru();
171
172 if (MadeChange)
174}
175
176static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
178 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
179 for (const RISCVMatInt::Inst &Inst : Seq) {
180 SDValue SDImm = CurDAG->getTargetConstant(Inst.getImm(), DL, VT);
181 SDNode *Result = nullptr;
182 switch (Inst.getOpndKind()) {
183 case RISCVMatInt::Imm:
184 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
185 break;
187 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
188 CurDAG->getRegister(RISCV::X0, VT));
189 break;
191 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
192 break;
194 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
195 break;
196 }
197
198 // Only the first instruction has X0 as its source.
199 SrcReg = SDValue(Result, 0);
200 }
201
202 return SrcReg;
203}
204
205static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
206 int64_t Imm, const RISCVSubtarget &Subtarget) {
208
209 // Use a rematerializable pseudo instruction for short sequences if enabled.
210 if (Seq.size() == 2 && UsePseudoMovImm)
211 return SDValue(
212 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
213 CurDAG->getTargetConstant(Imm, DL, VT)),
214 0);
215
216 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
217 // worst an LUI+ADDIW. This will require an extra register, but avoids a
218 // constant pool.
219 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
220 // low and high 32 bits are the same and bit 31 and 63 are set.
221 if (Seq.size() > 3) {
222 unsigned ShiftAmt, AddOpc;
224 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
225 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
226 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
227
228 SDValue SLLI = SDValue(
229 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
230 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
231 0);
232 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
233 }
234 }
235
236 // Otherwise, use the original sequence.
237 return selectImmSeq(CurDAG, DL, VT, Seq);
238}
239
241 unsigned NF, RISCVII::VLMUL LMUL) {
242 static const unsigned M1TupleRegClassIDs[] = {
243 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
244 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
245 RISCV::VRN8M1RegClassID};
246 static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID,
247 RISCV::VRN3M2RegClassID,
248 RISCV::VRN4M2RegClassID};
249
250 assert(Regs.size() >= 2 && Regs.size() <= 8);
251
252 unsigned RegClassID;
253 unsigned SubReg0;
254 switch (LMUL) {
255 default:
256 llvm_unreachable("Invalid LMUL.");
261 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
262 "Unexpected subreg numbering");
263 SubReg0 = RISCV::sub_vrm1_0;
264 RegClassID = M1TupleRegClassIDs[NF - 2];
265 break;
267 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
268 "Unexpected subreg numbering");
269 SubReg0 = RISCV::sub_vrm2_0;
270 RegClassID = M2TupleRegClassIDs[NF - 2];
271 break;
273 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
274 "Unexpected subreg numbering");
275 SubReg0 = RISCV::sub_vrm4_0;
276 RegClassID = RISCV::VRN2M4RegClassID;
277 break;
278 }
279
280 SDLoc DL(Regs[0]);
282
283 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
284
285 for (unsigned I = 0; I < Regs.size(); ++I) {
286 Ops.push_back(Regs[I]);
287 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
288 }
289 SDNode *N =
290 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
291 return SDValue(N, 0);
292}
293
295 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
296 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
297 bool IsLoad, MVT *IndexVT) {
298 SDValue Chain = Node->getOperand(0);
299 SDValue Glue;
300
301 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
302
303 if (IsStridedOrIndexed) {
304 Operands.push_back(Node->getOperand(CurOp++)); // Index.
305 if (IndexVT)
306 *IndexVT = Operands.back()->getSimpleValueType(0);
307 }
308
309 if (IsMasked) {
310 // Mask needs to be copied to V0.
311 SDValue Mask = Node->getOperand(CurOp++);
312 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
313 Glue = Chain.getValue(1);
314 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
315 }
316 SDValue VL;
317 selectVLOp(Node->getOperand(CurOp++), VL);
318 Operands.push_back(VL);
319
320 MVT XLenVT = Subtarget->getXLenVT();
321 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
322 Operands.push_back(SEWOp);
323
324 // At the IR layer, all the masked load intrinsics have policy operands,
325 // none of the others do. All have passthru operands. For our pseudos,
326 // all loads have policy operands.
327 if (IsLoad) {
329 if (IsMasked)
330 Policy = Node->getConstantOperandVal(CurOp++);
331 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
332 Operands.push_back(PolicyOp);
333 }
334
335 Operands.push_back(Chain); // Chain.
336 if (Glue)
337 Operands.push_back(Glue);
338}
339
340void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
341 bool IsStrided) {
342 SDLoc DL(Node);
343 unsigned NF = Node->getNumValues() - 1;
344 MVT VT = Node->getSimpleValueType(0);
345 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
347
348 unsigned CurOp = 2;
350
351 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
352 Node->op_begin() + CurOp + NF);
353 SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
354 Operands.push_back(Merge);
355 CurOp += NF;
356
357 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
358 Operands, /*IsLoad=*/true);
359
360 const RISCV::VLSEGPseudo *P =
361 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
362 static_cast<unsigned>(LMUL));
363 MachineSDNode *Load =
364 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
365
366 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
367 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
368
369 SDValue SuperReg = SDValue(Load, 0);
370 for (unsigned I = 0; I < NF; ++I) {
371 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
372 ReplaceUses(SDValue(Node, I),
373 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
374 }
375
376 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
377 CurDAG->RemoveDeadNode(Node);
378}
379
380void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
381 SDLoc DL(Node);
382 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain.
383 MVT VT = Node->getSimpleValueType(0);
384 MVT XLenVT = Subtarget->getXLenVT();
385 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
387
388 unsigned CurOp = 2;
390
391 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
392 Node->op_begin() + CurOp + NF);
393 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
394 Operands.push_back(MaskedOff);
395 CurOp += NF;
396
397 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
398 /*IsStridedOrIndexed*/ false, Operands,
399 /*IsLoad=*/true);
400
401 const RISCV::VLSEGPseudo *P =
402 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
403 Log2SEW, static_cast<unsigned>(LMUL));
404 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
405 XLenVT, MVT::Other, Operands);
406
407 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
408 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
409
410 SDValue SuperReg = SDValue(Load, 0);
411 for (unsigned I = 0; I < NF; ++I) {
412 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
413 ReplaceUses(SDValue(Node, I),
414 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
415 }
416
417 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL
418 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain
419 CurDAG->RemoveDeadNode(Node);
420}
421
422void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
423 bool IsOrdered) {
424 SDLoc DL(Node);
425 unsigned NF = Node->getNumValues() - 1;
426 MVT VT = Node->getSimpleValueType(0);
427 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
429
430 unsigned CurOp = 2;
432
433 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
434 Node->op_begin() + CurOp + NF);
435 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
436 Operands.push_back(MaskedOff);
437 CurOp += NF;
438
439 MVT IndexVT;
440 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
441 /*IsStridedOrIndexed*/ true, Operands,
442 /*IsLoad=*/true, &IndexVT);
443
445 "Element count mismatch");
446
447 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
448 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
449 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
450 report_fatal_error("The V extension does not support EEW=64 for index "
451 "values when XLEN=32");
452 }
453 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
454 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
455 static_cast<unsigned>(IndexLMUL));
456 MachineSDNode *Load =
457 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
458
459 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
460 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
461
462 SDValue SuperReg = SDValue(Load, 0);
463 for (unsigned I = 0; I < NF; ++I) {
464 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
465 ReplaceUses(SDValue(Node, I),
466 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
467 }
468
469 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
470 CurDAG->RemoveDeadNode(Node);
471}
472
473void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked,
474 bool IsStrided) {
475 SDLoc DL(Node);
476 unsigned NF = Node->getNumOperands() - 4;
477 if (IsStrided)
478 NF--;
479 if (IsMasked)
480 NF--;
481 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
482 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
484 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
485 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
486
488 Operands.push_back(StoreVal);
489 unsigned CurOp = 2 + NF;
490
491 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
492 Operands);
493
494 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
495 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
496 MachineSDNode *Store =
497 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
498
499 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
500 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
501
502 ReplaceNode(Node, Store);
503}
504
505void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
506 bool IsOrdered) {
507 SDLoc DL(Node);
508 unsigned NF = Node->getNumOperands() - 5;
509 if (IsMasked)
510 --NF;
511 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
512 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
514 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
515 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
516
518 Operands.push_back(StoreVal);
519 unsigned CurOp = 2 + NF;
520
521 MVT IndexVT;
522 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
523 /*IsStridedOrIndexed*/ true, Operands,
524 /*IsLoad=*/false, &IndexVT);
525
527 "Element count mismatch");
528
529 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
530 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
531 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
532 report_fatal_error("The V extension does not support EEW=64 for index "
533 "values when XLEN=32");
534 }
535 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
536 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
537 static_cast<unsigned>(IndexLMUL));
538 MachineSDNode *Store =
539 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
540
541 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
542 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
543
544 ReplaceNode(Node, Store);
545}
546
548 if (!Subtarget->hasVInstructions())
549 return;
550
551 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
552
553 SDLoc DL(Node);
554 MVT XLenVT = Subtarget->getXLenVT();
555
556 unsigned IntNo = Node->getConstantOperandVal(0);
557
558 assert((IntNo == Intrinsic::riscv_vsetvli ||
559 IntNo == Intrinsic::riscv_vsetvlimax) &&
560 "Unexpected vsetvli intrinsic");
561
562 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
563 unsigned Offset = (VLMax ? 1 : 2);
564
565 assert(Node->getNumOperands() == Offset + 2 &&
566 "Unexpected number of operands");
567
568 unsigned SEW =
569 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
570 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
571 Node->getConstantOperandVal(Offset + 1) & 0x7);
572
573 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
574 /*MaskAgnostic*/ true);
575 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
576
577 SDValue VLOperand;
578 unsigned Opcode = RISCV::PseudoVSETVLI;
579 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
580 if (auto VLEN = Subtarget->getRealVLen())
581 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
582 VLMax = true;
583 }
584 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
585 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
586 Opcode = RISCV::PseudoVSETVLIX0;
587 } else {
588 VLOperand = Node->getOperand(1);
589
590 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
591 uint64_t AVL = C->getZExtValue();
592 if (isUInt<5>(AVL)) {
593 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
594 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
595 XLenVT, VLImm, VTypeIOp));
596 return;
597 }
598 }
599 }
600
601 ReplaceNode(Node,
602 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
603}
604
606 MVT VT = Node->getSimpleValueType(0);
607 unsigned Opcode = Node->getOpcode();
608 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
609 "Unexpected opcode");
610 SDLoc DL(Node);
611
612 // For operations of the form (x << C1) op C2, check if we can use
613 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
614 SDValue N0 = Node->getOperand(0);
615 SDValue N1 = Node->getOperand(1);
616
617 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
618 if (!Cst)
619 return false;
620
621 int64_t Val = Cst->getSExtValue();
622
623 // Check if immediate can already use ANDI/ORI/XORI.
624 if (isInt<12>(Val))
625 return false;
626
627 SDValue Shift = N0;
628
629 // If Val is simm32 and we have a sext_inreg from i32, then the binop
630 // produces at least 33 sign bits. We can peek through the sext_inreg and use
631 // a SLLIW at the end.
632 bool SignExt = false;
633 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
634 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
635 SignExt = true;
636 Shift = N0.getOperand(0);
637 }
638
639 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
640 return false;
641
642 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
643 if (!ShlCst)
644 return false;
645
646 uint64_t ShAmt = ShlCst->getZExtValue();
647
648 // Make sure that we don't change the operation by removing bits.
649 // This only matters for OR and XOR, AND is unaffected.
650 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
651 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
652 return false;
653
654 int64_t ShiftedVal = Val >> ShAmt;
655 if (!isInt<12>(ShiftedVal))
656 return false;
657
658 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
659 if (SignExt && ShAmt >= 32)
660 return false;
661
662 // Ok, we can reorder to get a smaller immediate.
663 unsigned BinOpc;
664 switch (Opcode) {
665 default: llvm_unreachable("Unexpected opcode");
666 case ISD::AND: BinOpc = RISCV::ANDI; break;
667 case ISD::OR: BinOpc = RISCV::ORI; break;
668 case ISD::XOR: BinOpc = RISCV::XORI; break;
669 }
670
671 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
672
673 SDNode *BinOp =
674 CurDAG->getMachineNode(BinOpc, DL, VT, Shift.getOperand(0),
675 CurDAG->getTargetConstant(ShiftedVal, DL, VT));
676 SDNode *SLLI =
677 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
678 CurDAG->getTargetConstant(ShAmt, DL, VT));
679 ReplaceNode(Node, SLLI);
680 return true;
681}
682
684 // Only supported with XTHeadBb at the moment.
685 if (!Subtarget->hasVendorXTHeadBb())
686 return false;
687
688 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
689 if (!N1C)
690 return false;
691
692 SDValue N0 = Node->getOperand(0);
693 if (!N0.hasOneUse())
694 return false;
695
696 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
697 MVT VT) {
698 return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
699 CurDAG->getTargetConstant(Msb, DL, VT),
700 CurDAG->getTargetConstant(Lsb, DL, VT));
701 };
702
703 SDLoc DL(Node);
704 MVT VT = Node->getSimpleValueType(0);
705 const unsigned RightShAmt = N1C->getZExtValue();
706
707 // Transform (sra (shl X, C1) C2) with C1 < C2
708 // -> (TH.EXT X, msb, lsb)
709 if (N0.getOpcode() == ISD::SHL) {
710 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
711 if (!N01C)
712 return false;
713
714 const unsigned LeftShAmt = N01C->getZExtValue();
715 // Make sure that this is a bitfield extraction (i.e., the shift-right
716 // amount can not be less than the left-shift).
717 if (LeftShAmt > RightShAmt)
718 return false;
719
720 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
721 const unsigned Msb = MsbPlusOne - 1;
722 const unsigned Lsb = RightShAmt - LeftShAmt;
723
724 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
725 ReplaceNode(Node, TH_EXT);
726 return true;
727 }
728
729 // Transform (sra (sext_inreg X, _), C) ->
730 // (TH.EXT X, msb, lsb)
731 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
732 unsigned ExtSize =
733 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
734
735 // ExtSize of 32 should use sraiw via tablegen pattern.
736 if (ExtSize == 32)
737 return false;
738
739 const unsigned Msb = ExtSize - 1;
740 const unsigned Lsb = RightShAmt;
741
742 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
743 ReplaceNode(Node, TH_EXT);
744 return true;
745 }
746
747 return false;
748}
749
751 // Target does not support indexed loads.
752 if (!Subtarget->hasVendorXTHeadMemIdx())
753 return false;
754
755 LoadSDNode *Ld = cast<LoadSDNode>(Node);
757 if (AM == ISD::UNINDEXED)
758 return false;
759
760 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
761 if (!C)
762 return false;
763
764 EVT LoadVT = Ld->getMemoryVT();
765 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
766 "Unexpected addressing mode");
767 bool IsPre = AM == ISD::PRE_INC;
768 bool IsPost = AM == ISD::POST_INC;
769 int64_t Offset = C->getSExtValue();
770
771 // The constants that can be encoded in the THeadMemIdx instructions
772 // are of the form (sign_extend(imm5) << imm2).
773 int64_t Shift;
774 for (Shift = 0; Shift < 4; Shift++)
775 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
776 break;
777
778 // Constant cannot be encoded.
779 if (Shift == 4)
780 return false;
781
782 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
783 unsigned Opcode;
784 if (LoadVT == MVT::i8 && IsPre)
785 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
786 else if (LoadVT == MVT::i8 && IsPost)
787 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
788 else if (LoadVT == MVT::i16 && IsPre)
789 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
790 else if (LoadVT == MVT::i16 && IsPost)
791 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
792 else if (LoadVT == MVT::i32 && IsPre)
793 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
794 else if (LoadVT == MVT::i32 && IsPost)
795 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
796 else if (LoadVT == MVT::i64 && IsPre)
797 Opcode = RISCV::TH_LDIB;
798 else if (LoadVT == MVT::i64 && IsPost)
799 Opcode = RISCV::TH_LDIA;
800 else
801 return false;
802
803 EVT Ty = Ld->getOffset().getValueType();
804 SDValue Ops[] = {Ld->getBasePtr(),
805 CurDAG->getTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
806 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty),
807 Ld->getChain()};
808 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
809 Ld->getValueType(1), MVT::Other, Ops);
810
811 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
812 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
813
814 ReplaceNode(Node, New);
815
816 return true;
817}
818
820 if (!Subtarget->hasVInstructions())
821 return;
822
823 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
824
825 SDLoc DL(Node);
826 unsigned IntNo = Node->getConstantOperandVal(1);
827
828 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
829 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
830 "Unexpected vsetvli intrinsic");
831
832 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
833 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
834 SDValue SEWOp =
835 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
836 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
837 Node->getOperand(4), Node->getOperand(5),
838 Node->getOperand(8), SEWOp,
839 Node->getOperand(0)};
840
841 unsigned Opcode;
842 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
843 switch (LMulSDNode->getSExtValue()) {
844 case 5:
845 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF8
846 : RISCV::PseudoVC_I_SE_MF8;
847 break;
848 case 6:
849 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF4
850 : RISCV::PseudoVC_I_SE_MF4;
851 break;
852 case 7:
853 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF2
854 : RISCV::PseudoVC_I_SE_MF2;
855 break;
856 case 0:
857 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M1
858 : RISCV::PseudoVC_I_SE_M1;
859 break;
860 case 1:
861 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M2
862 : RISCV::PseudoVC_I_SE_M2;
863 break;
864 case 2:
865 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M4
866 : RISCV::PseudoVC_I_SE_M4;
867 break;
868 case 3:
869 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M8
870 : RISCV::PseudoVC_I_SE_M8;
871 break;
872 }
873
875 Opcode, DL, Node->getSimpleValueType(0), Operands));
876}
877
879 // If we have a custom node, we have already selected.
880 if (Node->isMachineOpcode()) {
881 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
882 Node->setNodeId(-1);
883 return;
884 }
885
886 // Instruction Selection not handled by the auto-generated tablegen selection
887 // should be handled here.
888 unsigned Opcode = Node->getOpcode();
889 MVT XLenVT = Subtarget->getXLenVT();
890 SDLoc DL(Node);
891 MVT VT = Node->getSimpleValueType(0);
892
893 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
894
895 switch (Opcode) {
896 case ISD::Constant: {
897 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
898 auto *ConstNode = cast<ConstantSDNode>(Node);
899 if (ConstNode->isZero()) {
900 SDValue New =
901 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
902 ReplaceNode(Node, New.getNode());
903 return;
904 }
905 int64_t Imm = ConstNode->getSExtValue();
906 // If only the lower 8 bits are used, try to convert this to a simm6 by
907 // sign-extending bit 7. This is neutral without the C extension, and
908 // allows C.LI to be used if C is present.
909 if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
910 Imm = SignExtend64<8>(Imm);
911 // If the upper XLen-16 bits are not used, try to convert this to a simm12
912 // by sign extending bit 15.
913 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
914 hasAllHUsers(Node))
915 Imm = SignExtend64<16>(Imm);
916 // If the upper 32-bits are not used try to convert this into a simm32 by
917 // sign extending bit 32.
918 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
919 Imm = SignExtend64<32>(Imm);
920
921 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
922 return;
923 }
924 case ISD::ConstantFP: {
925 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
926 auto [FPImm, NeedsFNeg] =
927 static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
928 VT);
929 if (FPImm >= 0) {
930 unsigned Opc;
931 unsigned FNegOpc;
932 switch (VT.SimpleTy) {
933 default:
934 llvm_unreachable("Unexpected size");
935 case MVT::f16:
936 Opc = RISCV::FLI_H;
937 FNegOpc = RISCV::FSGNJN_H;
938 break;
939 case MVT::f32:
940 Opc = RISCV::FLI_S;
941 FNegOpc = RISCV::FSGNJN_S;
942 break;
943 case MVT::f64:
944 Opc = RISCV::FLI_D;
945 FNegOpc = RISCV::FSGNJN_D;
946 break;
947 }
949 Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));
950 if (NeedsFNeg)
951 Res = CurDAG->getMachineNode(FNegOpc, DL, VT, SDValue(Res, 0),
952 SDValue(Res, 0));
953
954 ReplaceNode(Node, Res);
955 return;
956 }
957
958 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
959 SDValue Imm;
960 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
961 // create an integer immediate.
962 if (APF.isPosZero() || NegZeroF64)
963 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
964 else
965 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
966 *Subtarget);
967
968 bool HasZdinx = Subtarget->hasStdExtZdinx();
969 bool Is64Bit = Subtarget->is64Bit();
970 unsigned Opc;
971 switch (VT.SimpleTy) {
972 default:
973 llvm_unreachable("Unexpected size");
974 case MVT::bf16:
975 assert(Subtarget->hasStdExtZfbfmin());
976 Opc = RISCV::FMV_H_X;
977 break;
978 case MVT::f16:
979 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
980 break;
981 case MVT::f32:
982 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
983 break;
984 case MVT::f64:
985 // For RV32, we can't move from a GPR, we need to convert instead. This
986 // should only happen for +0.0 and -0.0.
987 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
988 if (Is64Bit)
989 Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
990 else
991 Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
992 break;
993 }
994
995 SDNode *Res;
996 if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
997 Res = CurDAG->getMachineNode(
998 Opc, DL, VT, Imm,
1000 else
1001 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1002
1003 // For f64 -0.0, we need to insert a fneg.d idiom.
1004 if (NegZeroF64) {
1005 Opc = RISCV::FSGNJN_D;
1006 if (HasZdinx)
1007 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1008 Res =
1009 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1010 }
1011
1012 ReplaceNode(Node, Res);
1013 return;
1014 }
1016 if (!Subtarget->hasStdExtZdinx())
1017 break;
1018
1019 assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1020
1021 SDValue Ops[] = {
1022 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1023 Node->getOperand(0),
1024 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1025 Node->getOperand(1),
1026 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1027
1028 SDNode *N =
1029 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::f64, Ops);
1030 ReplaceNode(Node, N);
1031 return;
1032 }
1033 case RISCVISD::SplitF64: {
1034 if (Subtarget->hasStdExtZdinx()) {
1035 assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1036
1037 if (!SDValue(Node, 0).use_empty()) {
1038 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, VT,
1039 Node->getOperand(0));
1040 ReplaceUses(SDValue(Node, 0), Lo);
1041 }
1042
1043 if (!SDValue(Node, 1).use_empty()) {
1044 SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, VT,
1045 Node->getOperand(0));
1046 ReplaceUses(SDValue(Node, 1), Hi);
1047 }
1048
1049 CurDAG->RemoveDeadNode(Node);
1050 return;
1051 }
1052
1053 if (!Subtarget->hasStdExtZfa())
1054 break;
1055 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1056 "Unexpected subtarget");
1057
1058 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1059 if (!SDValue(Node, 0).use_empty()) {
1060 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1061 Node->getOperand(0));
1062 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1063 }
1064 if (!SDValue(Node, 1).use_empty()) {
1065 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1066 Node->getOperand(0));
1067 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1068 }
1069
1070 CurDAG->RemoveDeadNode(Node);
1071 return;
1072 }
1073 case ISD::SHL: {
1074 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1075 if (!N1C)
1076 break;
1077 SDValue N0 = Node->getOperand(0);
1078 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1079 !isa<ConstantSDNode>(N0.getOperand(1)))
1080 break;
1081 unsigned ShAmt = N1C->getZExtValue();
1082 uint64_t Mask = N0.getConstantOperandVal(1);
1083
1084 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
1085 // 32 leading zeros and C3 trailing zeros.
1086 if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
1087 unsigned XLen = Subtarget->getXLen();
1088 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1089 unsigned TrailingZeros = llvm::countr_zero(Mask);
1090 if (TrailingZeros > 0 && LeadingZeros == 32) {
1091 SDNode *SRLIW = CurDAG->getMachineNode(
1092 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1093 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1094 SDNode *SLLI = CurDAG->getMachineNode(
1095 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1096 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1097 ReplaceNode(Node, SLLI);
1098 return;
1099 }
1100 }
1101 break;
1102 }
1103 case ISD::SRL: {
1104 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1105 if (!N1C)
1106 break;
1107 SDValue N0 = Node->getOperand(0);
1108 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1109 break;
1110 unsigned ShAmt = N1C->getZExtValue();
1111 uint64_t Mask = N0.getConstantOperandVal(1);
1112
1113 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1114 // 32 leading zeros and C3 trailing zeros.
1115 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1116 unsigned XLen = Subtarget->getXLen();
1117 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1118 unsigned TrailingZeros = llvm::countr_zero(Mask);
1119 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1120 SDNode *SRLIW = CurDAG->getMachineNode(
1121 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1122 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1123 SDNode *SLLI = CurDAG->getMachineNode(
1124 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1125 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1126 ReplaceNode(Node, SLLI);
1127 return;
1128 }
1129 }
1130
1131 // Optimize (srl (and X, C2), C) ->
1132 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1133 // Where C2 is a mask with C3 trailing ones.
1134 // Taking into account that the C2 may have had lower bits unset by
1135 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1136 // This pattern occurs when type legalizing right shifts for types with
1137 // less than XLen bits.
1138 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1139 if (!isMask_64(Mask))
1140 break;
1141 unsigned TrailingOnes = llvm::countr_one(Mask);
1142 if (ShAmt >= TrailingOnes)
1143 break;
1144 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1145 if (TrailingOnes == 32) {
1146 SDNode *SRLI = CurDAG->getMachineNode(
1147 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1148 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1149 ReplaceNode(Node, SRLI);
1150 return;
1151 }
1152
1153 // Only do the remaining transforms if the AND has one use.
1154 if (!N0.hasOneUse())
1155 break;
1156
1157 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1158 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1159 SDNode *BEXTI = CurDAG->getMachineNode(
1160 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1161 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1162 ReplaceNode(Node, BEXTI);
1163 return;
1164 }
1165
1166 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1167 SDNode *SLLI =
1168 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1169 CurDAG->getTargetConstant(LShAmt, DL, VT));
1170 SDNode *SRLI = CurDAG->getMachineNode(
1171 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1172 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1173 ReplaceNode(Node, SRLI);
1174 return;
1175 }
1176 case ISD::SRA: {
1177 if (trySignedBitfieldExtract(Node))
1178 return;
1179
1180 // Optimize (sra (sext_inreg X, i16), C) ->
1181 // (srai (slli X, (XLen-16), (XLen-16) + C)
1182 // And (sra (sext_inreg X, i8), C) ->
1183 // (srai (slli X, (XLen-8), (XLen-8) + C)
1184 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1185 // This transform matches the code we get without Zbb. The shifts are more
1186 // compressible, and this can help expose CSE opportunities in the sdiv by
1187 // constant optimization.
1188 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1189 if (!N1C)
1190 break;
1191 SDValue N0 = Node->getOperand(0);
1192 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1193 break;
1194 unsigned ShAmt = N1C->getZExtValue();
1195 unsigned ExtSize =
1196 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1197 // ExtSize of 32 should use sraiw via tablegen pattern.
1198 if (ExtSize >= 32 || ShAmt >= ExtSize)
1199 break;
1200 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1201 SDNode *SLLI =
1202 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1203 CurDAG->getTargetConstant(LShAmt, DL, VT));
1204 SDNode *SRAI = CurDAG->getMachineNode(
1205 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1206 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1207 ReplaceNode(Node, SRAI);
1208 return;
1209 }
1210 case ISD::OR:
1211 case ISD::XOR:
1212 if (tryShrinkShlLogicImm(Node))
1213 return;
1214
1215 break;
1216 case ISD::AND: {
1217 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1218 if (!N1C)
1219 break;
1220
1221 SDValue N0 = Node->getOperand(0);
1222
1223 auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
1224 SDValue X, unsigned Msb,
1225 unsigned Lsb) {
1226 if (!Subtarget->hasVendorXTHeadBb())
1227 return false;
1228
1229 SDNode *TH_EXTU = CurDAG->getMachineNode(
1230 RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
1231 CurDAG->getTargetConstant(Lsb, DL, VT));
1232 ReplaceNode(Node, TH_EXTU);
1233 return true;
1234 };
1235
1236 bool LeftShift = N0.getOpcode() == ISD::SHL;
1237 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1238 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1239 if (!C)
1240 break;
1241 unsigned C2 = C->getZExtValue();
1242 unsigned XLen = Subtarget->getXLen();
1243 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1244
1245 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1246 // shift pair might offer more compression opportunities.
1247 // TODO: We could check for C extension here, but we don't have many lit
1248 // tests with the C extension enabled so not checking gets better
1249 // coverage.
1250 // TODO: What if ANDI faster than shift?
1251 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1252
1253 uint64_t C1 = N1C->getZExtValue();
1254
1255 // Clear irrelevant bits in the mask.
1256 if (LeftShift)
1257 C1 &= maskTrailingZeros<uint64_t>(C2);
1258 else
1259 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1260
1261 // Some transforms should only be done if the shift has a single use or
1262 // the AND would become (srli (slli X, 32), 32)
1263 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1264
1265 SDValue X = N0.getOperand(0);
1266
1267 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1268 // with c3 leading zeros.
1269 if (!LeftShift && isMask_64(C1)) {
1270 unsigned Leading = XLen - llvm::bit_width(C1);
1271 if (C2 < Leading) {
1272 // If the number of leading zeros is C2+32 this can be SRLIW.
1273 if (C2 + 32 == Leading) {
1274 SDNode *SRLIW = CurDAG->getMachineNode(
1275 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1276 ReplaceNode(Node, SRLIW);
1277 return;
1278 }
1279
1280 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1281 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1282 //
1283 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1284 // legalized and goes through DAG combine.
1285 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1286 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1287 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1288 SDNode *SRAIW =
1289 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1290 CurDAG->getTargetConstant(31, DL, VT));
1291 SDNode *SRLIW = CurDAG->getMachineNode(
1292 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1293 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1294 ReplaceNode(Node, SRLIW);
1295 return;
1296 }
1297
1298 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1299 // available.
1300 // Transform (and (srl x, C2), C1)
1301 // -> (<bfextract> x, msb, lsb)
1302 //
1303 // Make sure to keep this below the SRLIW cases, as we always want to
1304 // prefer the more common instruction.
1305 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1306 const unsigned Lsb = C2;
1307 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1308 return;
1309
1310 // (srli (slli x, c3-c2), c3).
1311 // Skip if we could use (zext.w (sraiw X, C2)).
1312 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1313 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1314 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1315 // Also Skip if we can use bexti or th.tst.
1316 Skip |= HasBitTest && Leading == XLen - 1;
1317 if (OneUseOrZExtW && !Skip) {
1318 SDNode *SLLI = CurDAG->getMachineNode(
1319 RISCV::SLLI, DL, VT, X,
1320 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1321 SDNode *SRLI = CurDAG->getMachineNode(
1322 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1323 CurDAG->getTargetConstant(Leading, DL, VT));
1324 ReplaceNode(Node, SRLI);
1325 return;
1326 }
1327 }
1328 }
1329
1330 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1331 // shifted by c2 bits with c3 leading zeros.
1332 if (LeftShift && isShiftedMask_64(C1)) {
1333 unsigned Leading = XLen - llvm::bit_width(C1);
1334
1335 if (C2 + Leading < XLen &&
1336 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1337 // Use slli.uw when possible.
1338 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1339 SDNode *SLLI_UW =
1340 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1341 CurDAG->getTargetConstant(C2, DL, VT));
1342 ReplaceNode(Node, SLLI_UW);
1343 return;
1344 }
1345
1346 // (srli (slli c2+c3), c3)
1347 if (OneUseOrZExtW && !IsCANDI) {
1348 SDNode *SLLI = CurDAG->getMachineNode(
1349 RISCV::SLLI, DL, VT, X,
1350 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1351 SDNode *SRLI = CurDAG->getMachineNode(
1352 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1353 CurDAG->getTargetConstant(Leading, DL, VT));
1354 ReplaceNode(Node, SRLI);
1355 return;
1356 }
1357 }
1358 }
1359
1360 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1361 // shifted mask with c2 leading zeros and c3 trailing zeros.
1362 if (!LeftShift && isShiftedMask_64(C1)) {
1363 unsigned Leading = XLen - llvm::bit_width(C1);
1364 unsigned Trailing = llvm::countr_zero(C1);
1365 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1366 !IsCANDI) {
1367 unsigned SrliOpc = RISCV::SRLI;
1368 // If the input is zexti32 we should use SRLIW.
1369 if (X.getOpcode() == ISD::AND &&
1370 isa<ConstantSDNode>(X.getOperand(1)) &&
1371 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1372 SrliOpc = RISCV::SRLIW;
1373 X = X.getOperand(0);
1374 }
1375 SDNode *SRLI = CurDAG->getMachineNode(
1376 SrliOpc, DL, VT, X,
1377 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1378 SDNode *SLLI = CurDAG->getMachineNode(
1379 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1380 CurDAG->getTargetConstant(Trailing, DL, VT));
1381 ReplaceNode(Node, SLLI);
1382 return;
1383 }
1384 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1385 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1386 OneUseOrZExtW && !IsCANDI) {
1387 SDNode *SRLIW = CurDAG->getMachineNode(
1388 RISCV::SRLIW, DL, VT, X,
1389 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1390 SDNode *SLLI = CurDAG->getMachineNode(
1391 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1392 CurDAG->getTargetConstant(Trailing, DL, VT));
1393 ReplaceNode(Node, SLLI);
1394 return;
1395 }
1396 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1397 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1398 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1399 SDNode *SRLI = CurDAG->getMachineNode(
1400 RISCV::SRLI, DL, VT, X,
1401 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1402 SDNode *SLLI_UW = CurDAG->getMachineNode(
1403 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1404 CurDAG->getTargetConstant(Trailing, DL, VT));
1405 ReplaceNode(Node, SLLI_UW);
1406 return;
1407 }
1408 }
1409
1410 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1411 // shifted mask with no leading zeros and c3 trailing zeros.
1412 if (LeftShift && isShiftedMask_64(C1)) {
1413 unsigned Leading = XLen - llvm::bit_width(C1);
1414 unsigned Trailing = llvm::countr_zero(C1);
1415 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1416 SDNode *SRLI = CurDAG->getMachineNode(
1417 RISCV::SRLI, DL, VT, X,
1418 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1419 SDNode *SLLI = CurDAG->getMachineNode(
1420 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1421 CurDAG->getTargetConstant(Trailing, DL, VT));
1422 ReplaceNode(Node, SLLI);
1423 return;
1424 }
1425 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1426 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1427 SDNode *SRLIW = CurDAG->getMachineNode(
1428 RISCV::SRLIW, DL, VT, X,
1429 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1430 SDNode *SLLI = CurDAG->getMachineNode(
1431 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1432 CurDAG->getTargetConstant(Trailing, DL, VT));
1433 ReplaceNode(Node, SLLI);
1434 return;
1435 }
1436
1437 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1438 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1439 Subtarget->hasStdExtZba()) {
1440 SDNode *SRLI = CurDAG->getMachineNode(
1441 RISCV::SRLI, DL, VT, X,
1442 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1443 SDNode *SLLI_UW = CurDAG->getMachineNode(
1444 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1445 CurDAG->getTargetConstant(Trailing, DL, VT));
1446 ReplaceNode(Node, SLLI_UW);
1447 return;
1448 }
1449 }
1450 }
1451
1452 const uint64_t C1 = N1C->getZExtValue();
1453
1454 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a mask
1455 // with c3 leading zeros and c2 is larger than c3.
1456 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1457 N0.hasOneUse()) {
1458 unsigned C2 = N0.getConstantOperandVal(1);
1459 unsigned XLen = Subtarget->getXLen();
1460 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1461
1462 SDValue X = N0.getOperand(0);
1463
1464 // Prefer SRAIW + ANDI when possible.
1465 bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1466 X.getOpcode() == ISD::SHL &&
1467 isa<ConstantSDNode>(X.getOperand(1)) &&
1468 X.getConstantOperandVal(1) == 32;
1469 if (isMask_64(C1) && !Skip) {
1470 unsigned Leading = XLen - llvm::bit_width(C1);
1471 if (C2 > Leading) {
1472 SDNode *SRAI = CurDAG->getMachineNode(
1473 RISCV::SRAI, DL, VT, X,
1474 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1475 SDNode *SRLI = CurDAG->getMachineNode(
1476 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1477 CurDAG->getTargetConstant(Leading, DL, VT));
1478 ReplaceNode(Node, SRLI);
1479 return;
1480 }
1481 }
1482 }
1483
1484 // If C1 masks off the upper bits only (but can't be formed as an
1485 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1486 // available.
1487 // Transform (and x, C1)
1488 // -> (<bfextract> x, msb, lsb)
1489 if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue())) {
1490 const unsigned Msb = llvm::bit_width(C1) - 1;
1491 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1492 return;
1493 }
1494
1495 if (tryShrinkShlLogicImm(Node))
1496 return;
1497
1498 break;
1499 }
1500 case ISD::MUL: {
1501 // Special case for calculating (mul (and X, C2), C1) where the full product
1502 // fits in XLen bits. We can shift X left by the number of leading zeros in
1503 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1504 // product has XLen trailing zeros, putting it in the output of MULHU. This
1505 // can avoid materializing a constant in a register for C2.
1506
1507 // RHS should be a constant.
1508 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1509 if (!N1C || !N1C->hasOneUse())
1510 break;
1511
1512 // LHS should be an AND with constant.
1513 SDValue N0 = Node->getOperand(0);
1514 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1515 break;
1516
1518
1519 // Constant should be a mask.
1520 if (!isMask_64(C2))
1521 break;
1522
1523 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1524 // multiple users or the constant is a simm12. This prevents inserting a
1525 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1526 // make it more costly to materialize. Otherwise, using a SLLI might allow
1527 // it to be compressed.
1528 bool IsANDIOrZExt =
1529 isInt<12>(C2) ||
1530 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1531 // With XTHeadBb, we can use TH.EXTU.
1532 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1533 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1534 break;
1535 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1536 // the constant is a simm32.
1537 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1538 // With XTHeadBb, we can use TH.EXTU.
1539 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1540 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1541 break;
1542
1543 // We need to shift left the AND input and C1 by a total of XLen bits.
1544
1545 // How far left do we need to shift the AND input?
1546 unsigned XLen = Subtarget->getXLen();
1547 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1548
1549 // The constant gets shifted by the remaining amount unless that would
1550 // shift bits out.
1551 uint64_t C1 = N1C->getZExtValue();
1552 unsigned ConstantShift = XLen - LeadingZeros;
1553 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1554 break;
1555
1556 uint64_t ShiftedC1 = C1 << ConstantShift;
1557 // If this RV32, we need to sign extend the constant.
1558 if (XLen == 32)
1559 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1560
1561 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1562 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1563 SDNode *SLLI =
1564 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1565 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1566 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1567 SDValue(SLLI, 0), SDValue(Imm, 0));
1568 ReplaceNode(Node, MULHU);
1569 return;
1570 }
1571 case ISD::LOAD: {
1572 if (tryIndexedLoad(Node))
1573 return;
1574
1575 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1576 // We match post-incrementing load here
1577 LoadSDNode *Load = cast<LoadSDNode>(Node);
1578 if (Load->getAddressingMode() != ISD::POST_INC)
1579 break;
1580
1581 SDValue Chain = Node->getOperand(0);
1582 SDValue Base = Node->getOperand(1);
1583 SDValue Offset = Node->getOperand(2);
1584
1585 bool Simm12 = false;
1586 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1587
1588 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1589 int ConstantVal = ConstantOffset->getSExtValue();
1590 Simm12 = isInt<12>(ConstantVal);
1591 if (Simm12)
1592 Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1593 Offset.getValueType());
1594 }
1595
1596 unsigned Opcode = 0;
1597 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1598 case MVT::i8:
1599 if (Simm12 && SignExtend)
1600 Opcode = RISCV::CV_LB_ri_inc;
1601 else if (Simm12 && !SignExtend)
1602 Opcode = RISCV::CV_LBU_ri_inc;
1603 else if (!Simm12 && SignExtend)
1604 Opcode = RISCV::CV_LB_rr_inc;
1605 else
1606 Opcode = RISCV::CV_LBU_rr_inc;
1607 break;
1608 case MVT::i16:
1609 if (Simm12 && SignExtend)
1610 Opcode = RISCV::CV_LH_ri_inc;
1611 else if (Simm12 && !SignExtend)
1612 Opcode = RISCV::CV_LHU_ri_inc;
1613 else if (!Simm12 && SignExtend)
1614 Opcode = RISCV::CV_LH_rr_inc;
1615 else
1616 Opcode = RISCV::CV_LHU_rr_inc;
1617 break;
1618 case MVT::i32:
1619 if (Simm12)
1620 Opcode = RISCV::CV_LW_ri_inc;
1621 else
1622 Opcode = RISCV::CV_LW_rr_inc;
1623 break;
1624 default:
1625 break;
1626 }
1627 if (!Opcode)
1628 break;
1629
1630 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1631 Chain.getSimpleValueType(), Base,
1632 Offset, Chain));
1633 return;
1634 }
1635 break;
1636 }
1638 unsigned IntNo = Node->getConstantOperandVal(0);
1639 switch (IntNo) {
1640 // By default we do not custom select any intrinsic.
1641 default:
1642 break;
1643 case Intrinsic::riscv_vmsgeu:
1644 case Intrinsic::riscv_vmsge: {
1645 SDValue Src1 = Node->getOperand(1);
1646 SDValue Src2 = Node->getOperand(2);
1647 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1648 bool IsCmpUnsignedZero = false;
1649 // Only custom select scalar second operand.
1650 if (Src2.getValueType() != XLenVT)
1651 break;
1652 // Small constants are handled with patterns.
1653 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1654 int64_t CVal = C->getSExtValue();
1655 if (CVal >= -15 && CVal <= 16) {
1656 if (!IsUnsigned || CVal != 0)
1657 break;
1658 IsCmpUnsignedZero = true;
1659 }
1660 }
1661 MVT Src1VT = Src1.getSimpleValueType();
1662 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;
1663 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1664 default:
1665 llvm_unreachable("Unexpected LMUL!");
1666#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \
1667 case RISCVII::VLMUL::lmulenum: \
1668 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1669 : RISCV::PseudoVMSLT_VX_##suffix; \
1670 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1671 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \
1672 break;
1673 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)
1674 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)
1675 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)
1677 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)
1678 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)
1679 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)
1680#undef CASE_VMSLT_VMNAND_VMSET_OPCODES
1681 }
1683 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1684 SDValue VL;
1685 selectVLOp(Node->getOperand(3), VL);
1686
1687 // If vmsgeu with 0 immediate, expand it to vmset.
1688 if (IsCmpUnsignedZero) {
1689 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));
1690 return;
1691 }
1692
1693 // Expand to
1694 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1695 SDValue Cmp = SDValue(
1696 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1697 0);
1698 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1699 {Cmp, Cmp, VL, SEW}));
1700 return;
1701 }
1702 case Intrinsic::riscv_vmsgeu_mask:
1703 case Intrinsic::riscv_vmsge_mask: {
1704 SDValue Src1 = Node->getOperand(2);
1705 SDValue Src2 = Node->getOperand(3);
1706 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1707 bool IsCmpUnsignedZero = false;
1708 // Only custom select scalar second operand.
1709 if (Src2.getValueType() != XLenVT)
1710 break;
1711 // Small constants are handled with patterns.
1712 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1713 int64_t CVal = C->getSExtValue();
1714 if (CVal >= -15 && CVal <= 16) {
1715 if (!IsUnsigned || CVal != 0)
1716 break;
1717 IsCmpUnsignedZero = true;
1718 }
1719 }
1720 MVT Src1VT = Src1.getSimpleValueType();
1721 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1722 VMOROpcode;
1723 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1724 default:
1725 llvm_unreachable("Unexpected LMUL!");
1726#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \
1727 case RISCVII::VLMUL::lmulenum: \
1728 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1729 : RISCV::PseudoVMSLT_VX_##suffix; \
1730 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
1731 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
1732 break;
1733 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)
1734 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)
1735 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)
1736 CASE_VMSLT_OPCODES(LMUL_1, M1, B8)
1737 CASE_VMSLT_OPCODES(LMUL_2, M2, B16)
1738 CASE_VMSLT_OPCODES(LMUL_4, M4, B32)
1739 CASE_VMSLT_OPCODES(LMUL_8, M8, B64)
1740#undef CASE_VMSLT_OPCODES
1741 }
1742 // Mask operations use the LMUL from the mask type.
1743 switch (RISCVTargetLowering::getLMUL(VT)) {
1744 default:
1745 llvm_unreachable("Unexpected LMUL!");
1746#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
1747 case RISCVII::VLMUL::lmulenum: \
1748 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
1749 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
1750 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
1751 break;
1752 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)
1753 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)
1754 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)
1759#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1760 }
1762 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1763 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1764 SDValue VL;
1765 selectVLOp(Node->getOperand(5), VL);
1766 SDValue MaskedOff = Node->getOperand(1);
1767 SDValue Mask = Node->getOperand(4);
1768
1769 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.
1770 if (IsCmpUnsignedZero) {
1771 // We don't need vmor if the MaskedOff and the Mask are the same
1772 // value.
1773 if (Mask == MaskedOff) {
1774 ReplaceUses(Node, Mask.getNode());
1775 return;
1776 }
1777 ReplaceNode(Node,
1778 CurDAG->getMachineNode(VMOROpcode, DL, VT,
1779 {Mask, MaskedOff, VL, MaskSEW}));
1780 return;
1781 }
1782
1783 // If the MaskedOff value and the Mask are the same value use
1784 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
1785 // This avoids needing to copy v0 to vd before starting the next sequence.
1786 if (Mask == MaskedOff) {
1787 SDValue Cmp = SDValue(
1788 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1789 0);
1790 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1791 {Mask, Cmp, VL, MaskSEW}));
1792 return;
1793 }
1794
1795 // Mask needs to be copied to V0.
1797 RISCV::V0, Mask, SDValue());
1798 SDValue Glue = Chain.getValue(1);
1799 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1800
1801 // Otherwise use
1802 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1803 // The result is mask undisturbed.
1804 // We use the same instructions to emulate mask agnostic behavior, because
1805 // the agnostic result can be either undisturbed or all 1.
1806 SDValue Cmp = SDValue(
1807 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1808 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1809 0);
1810 // vmxor.mm vd, vd, v0 is used to update active value.
1811 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1812 {Cmp, Mask, VL, MaskSEW}));
1813 return;
1814 }
1815 case Intrinsic::riscv_vsetvli:
1816 case Intrinsic::riscv_vsetvlimax:
1817 return selectVSETVLI(Node);
1818 }
1819 break;
1820 }
1822 unsigned IntNo = Node->getConstantOperandVal(1);
1823 switch (IntNo) {
1824 // By default we do not custom select any intrinsic.
1825 default:
1826 break;
1827 case Intrinsic::riscv_vlseg2:
1828 case Intrinsic::riscv_vlseg3:
1829 case Intrinsic::riscv_vlseg4:
1830 case Intrinsic::riscv_vlseg5:
1831 case Intrinsic::riscv_vlseg6:
1832 case Intrinsic::riscv_vlseg7:
1833 case Intrinsic::riscv_vlseg8: {
1834 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1835 return;
1836 }
1837 case Intrinsic::riscv_vlseg2_mask:
1838 case Intrinsic::riscv_vlseg3_mask:
1839 case Intrinsic::riscv_vlseg4_mask:
1840 case Intrinsic::riscv_vlseg5_mask:
1841 case Intrinsic::riscv_vlseg6_mask:
1842 case Intrinsic::riscv_vlseg7_mask:
1843 case Intrinsic::riscv_vlseg8_mask: {
1844 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1845 return;
1846 }
1847 case Intrinsic::riscv_vlsseg2:
1848 case Intrinsic::riscv_vlsseg3:
1849 case Intrinsic::riscv_vlsseg4:
1850 case Intrinsic::riscv_vlsseg5:
1851 case Intrinsic::riscv_vlsseg6:
1852 case Intrinsic::riscv_vlsseg7:
1853 case Intrinsic::riscv_vlsseg8: {
1854 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1855 return;
1856 }
1857 case Intrinsic::riscv_vlsseg2_mask:
1858 case Intrinsic::riscv_vlsseg3_mask:
1859 case Intrinsic::riscv_vlsseg4_mask:
1860 case Intrinsic::riscv_vlsseg5_mask:
1861 case Intrinsic::riscv_vlsseg6_mask:
1862 case Intrinsic::riscv_vlsseg7_mask:
1863 case Intrinsic::riscv_vlsseg8_mask: {
1864 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1865 return;
1866 }
1867 case Intrinsic::riscv_vloxseg2:
1868 case Intrinsic::riscv_vloxseg3:
1869 case Intrinsic::riscv_vloxseg4:
1870 case Intrinsic::riscv_vloxseg5:
1871 case Intrinsic::riscv_vloxseg6:
1872 case Intrinsic::riscv_vloxseg7:
1873 case Intrinsic::riscv_vloxseg8:
1874 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1875 return;
1876 case Intrinsic::riscv_vluxseg2:
1877 case Intrinsic::riscv_vluxseg3:
1878 case Intrinsic::riscv_vluxseg4:
1879 case Intrinsic::riscv_vluxseg5:
1880 case Intrinsic::riscv_vluxseg6:
1881 case Intrinsic::riscv_vluxseg7:
1882 case Intrinsic::riscv_vluxseg8:
1883 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1884 return;
1885 case Intrinsic::riscv_vloxseg2_mask:
1886 case Intrinsic::riscv_vloxseg3_mask:
1887 case Intrinsic::riscv_vloxseg4_mask:
1888 case Intrinsic::riscv_vloxseg5_mask:
1889 case Intrinsic::riscv_vloxseg6_mask:
1890 case Intrinsic::riscv_vloxseg7_mask:
1891 case Intrinsic::riscv_vloxseg8_mask:
1892 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1893 return;
1894 case Intrinsic::riscv_vluxseg2_mask:
1895 case Intrinsic::riscv_vluxseg3_mask:
1896 case Intrinsic::riscv_vluxseg4_mask:
1897 case Intrinsic::riscv_vluxseg5_mask:
1898 case Intrinsic::riscv_vluxseg6_mask:
1899 case Intrinsic::riscv_vluxseg7_mask:
1900 case Intrinsic::riscv_vluxseg8_mask:
1901 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1902 return;
1903 case Intrinsic::riscv_vlseg8ff:
1904 case Intrinsic::riscv_vlseg7ff:
1905 case Intrinsic::riscv_vlseg6ff:
1906 case Intrinsic::riscv_vlseg5ff:
1907 case Intrinsic::riscv_vlseg4ff:
1908 case Intrinsic::riscv_vlseg3ff:
1909 case Intrinsic::riscv_vlseg2ff: {
1910 selectVLSEGFF(Node, /*IsMasked*/ false);
1911 return;
1912 }
1913 case Intrinsic::riscv_vlseg8ff_mask:
1914 case Intrinsic::riscv_vlseg7ff_mask:
1915 case Intrinsic::riscv_vlseg6ff_mask:
1916 case Intrinsic::riscv_vlseg5ff_mask:
1917 case Intrinsic::riscv_vlseg4ff_mask:
1918 case Intrinsic::riscv_vlseg3ff_mask:
1919 case Intrinsic::riscv_vlseg2ff_mask: {
1920 selectVLSEGFF(Node, /*IsMasked*/ true);
1921 return;
1922 }
1923 case Intrinsic::riscv_vloxei:
1924 case Intrinsic::riscv_vloxei_mask:
1925 case Intrinsic::riscv_vluxei:
1926 case Intrinsic::riscv_vluxei_mask: {
1927 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1928 IntNo == Intrinsic::riscv_vluxei_mask;
1929 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1930 IntNo == Intrinsic::riscv_vloxei_mask;
1931
1932 MVT VT = Node->getSimpleValueType(0);
1933 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1934
1935 unsigned CurOp = 2;
1937 Operands.push_back(Node->getOperand(CurOp++));
1938
1939 MVT IndexVT;
1940 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1941 /*IsStridedOrIndexed*/ true, Operands,
1942 /*IsLoad=*/true, &IndexVT);
1943
1945 "Element count mismatch");
1946
1948 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1949 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1950 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1951 report_fatal_error("The V extension does not support EEW=64 for index "
1952 "values when XLEN=32");
1953 }
1954 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
1955 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
1956 static_cast<unsigned>(IndexLMUL));
1957 MachineSDNode *Load =
1958 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1959
1960 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1961 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1962
1963 ReplaceNode(Node, Load);
1964 return;
1965 }
1966 case Intrinsic::riscv_vlm:
1967 case Intrinsic::riscv_vle:
1968 case Intrinsic::riscv_vle_mask:
1969 case Intrinsic::riscv_vlse:
1970 case Intrinsic::riscv_vlse_mask: {
1971 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
1972 IntNo == Intrinsic::riscv_vlse_mask;
1973 bool IsStrided =
1974 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
1975
1976 MVT VT = Node->getSimpleValueType(0);
1977 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1978
1979 // The riscv_vlm intrinsic are always tail agnostic and no passthru
1980 // operand at the IR level. In pseudos, they have both policy and
1981 // passthru operand. The passthru operand is needed to track the
1982 // "tail undefined" state, and the policy is there just for
1983 // for consistency - it will always be "don't care" for the
1984 // unmasked form.
1985 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
1986 unsigned CurOp = 2;
1988 if (HasPassthruOperand)
1989 Operands.push_back(Node->getOperand(CurOp++));
1990 else {
1991 // We eagerly lower to implicit_def (instead of undef), as we
1992 // otherwise fail to select nodes such as: nxv1i1 = undef
1993 SDNode *Passthru =
1994 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
1995 Operands.push_back(SDValue(Passthru, 0));
1996 }
1997 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1998 Operands, /*IsLoad=*/true);
1999
2001 const RISCV::VLEPseudo *P =
2002 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2003 static_cast<unsigned>(LMUL));
2004 MachineSDNode *Load =
2005 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2006
2007 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2008 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2009
2010 ReplaceNode(Node, Load);
2011 return;
2012 }
2013 case Intrinsic::riscv_vleff:
2014 case Intrinsic::riscv_vleff_mask: {
2015 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2016
2017 MVT VT = Node->getSimpleValueType(0);
2018 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2019
2020 unsigned CurOp = 2;
2022 Operands.push_back(Node->getOperand(CurOp++));
2023 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2024 /*IsStridedOrIndexed*/ false, Operands,
2025 /*IsLoad=*/true);
2026
2028 const RISCV::VLEPseudo *P =
2029 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2030 Log2SEW, static_cast<unsigned>(LMUL));
2032 P->Pseudo, DL, Node->getVTList(), Operands);
2033 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2034 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2035
2036 ReplaceNode(Node, Load);
2037 return;
2038 }
2039 }
2040 break;
2041 }
2042 case ISD::INTRINSIC_VOID: {
2043 unsigned IntNo = Node->getConstantOperandVal(1);
2044 switch (IntNo) {
2045 case Intrinsic::riscv_vsseg2:
2046 case Intrinsic::riscv_vsseg3:
2047 case Intrinsic::riscv_vsseg4:
2048 case Intrinsic::riscv_vsseg5:
2049 case Intrinsic::riscv_vsseg6:
2050 case Intrinsic::riscv_vsseg7:
2051 case Intrinsic::riscv_vsseg8: {
2052 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
2053 return;
2054 }
2055 case Intrinsic::riscv_vsseg2_mask:
2056 case Intrinsic::riscv_vsseg3_mask:
2057 case Intrinsic::riscv_vsseg4_mask:
2058 case Intrinsic::riscv_vsseg5_mask:
2059 case Intrinsic::riscv_vsseg6_mask:
2060 case Intrinsic::riscv_vsseg7_mask:
2061 case Intrinsic::riscv_vsseg8_mask: {
2062 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
2063 return;
2064 }
2065 case Intrinsic::riscv_vssseg2:
2066 case Intrinsic::riscv_vssseg3:
2067 case Intrinsic::riscv_vssseg4:
2068 case Intrinsic::riscv_vssseg5:
2069 case Intrinsic::riscv_vssseg6:
2070 case Intrinsic::riscv_vssseg7:
2071 case Intrinsic::riscv_vssseg8: {
2072 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
2073 return;
2074 }
2075 case Intrinsic::riscv_vssseg2_mask:
2076 case Intrinsic::riscv_vssseg3_mask:
2077 case Intrinsic::riscv_vssseg4_mask:
2078 case Intrinsic::riscv_vssseg5_mask:
2079 case Intrinsic::riscv_vssseg6_mask:
2080 case Intrinsic::riscv_vssseg7_mask:
2081 case Intrinsic::riscv_vssseg8_mask: {
2082 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
2083 return;
2084 }
2085 case Intrinsic::riscv_vsoxseg2:
2086 case Intrinsic::riscv_vsoxseg3:
2087 case Intrinsic::riscv_vsoxseg4:
2088 case Intrinsic::riscv_vsoxseg5:
2089 case Intrinsic::riscv_vsoxseg6:
2090 case Intrinsic::riscv_vsoxseg7:
2091 case Intrinsic::riscv_vsoxseg8:
2092 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
2093 return;
2094 case Intrinsic::riscv_vsuxseg2:
2095 case Intrinsic::riscv_vsuxseg3:
2096 case Intrinsic::riscv_vsuxseg4:
2097 case Intrinsic::riscv_vsuxseg5:
2098 case Intrinsic::riscv_vsuxseg6:
2099 case Intrinsic::riscv_vsuxseg7:
2100 case Intrinsic::riscv_vsuxseg8:
2101 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
2102 return;
2103 case Intrinsic::riscv_vsoxseg2_mask:
2104 case Intrinsic::riscv_vsoxseg3_mask:
2105 case Intrinsic::riscv_vsoxseg4_mask:
2106 case Intrinsic::riscv_vsoxseg5_mask:
2107 case Intrinsic::riscv_vsoxseg6_mask:
2108 case Intrinsic::riscv_vsoxseg7_mask:
2109 case Intrinsic::riscv_vsoxseg8_mask:
2110 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
2111 return;
2112 case Intrinsic::riscv_vsuxseg2_mask:
2113 case Intrinsic::riscv_vsuxseg3_mask:
2114 case Intrinsic::riscv_vsuxseg4_mask:
2115 case Intrinsic::riscv_vsuxseg5_mask:
2116 case Intrinsic::riscv_vsuxseg6_mask:
2117 case Intrinsic::riscv_vsuxseg7_mask:
2118 case Intrinsic::riscv_vsuxseg8_mask:
2119 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
2120 return;
2121 case Intrinsic::riscv_vsoxei:
2122 case Intrinsic::riscv_vsoxei_mask:
2123 case Intrinsic::riscv_vsuxei:
2124 case Intrinsic::riscv_vsuxei_mask: {
2125 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2126 IntNo == Intrinsic::riscv_vsuxei_mask;
2127 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2128 IntNo == Intrinsic::riscv_vsoxei_mask;
2129
2130 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2131 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2132
2133 unsigned CurOp = 2;
2135 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2136
2137 MVT IndexVT;
2138 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2139 /*IsStridedOrIndexed*/ true, Operands,
2140 /*IsLoad=*/false, &IndexVT);
2141
2143 "Element count mismatch");
2144
2146 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2147 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2148 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2149 report_fatal_error("The V extension does not support EEW=64 for index "
2150 "values when XLEN=32");
2151 }
2152 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2153 IsMasked, IsOrdered, IndexLog2EEW,
2154 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2155 MachineSDNode *Store =
2156 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2157
2158 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2159 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2160
2161 ReplaceNode(Node, Store);
2162 return;
2163 }
2164 case Intrinsic::riscv_vsm:
2165 case Intrinsic::riscv_vse:
2166 case Intrinsic::riscv_vse_mask:
2167 case Intrinsic::riscv_vsse:
2168 case Intrinsic::riscv_vsse_mask: {
2169 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2170 IntNo == Intrinsic::riscv_vsse_mask;
2171 bool IsStrided =
2172 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2173
2174 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2175 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2176
2177 unsigned CurOp = 2;
2179 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2180
2181 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2182 Operands);
2183
2185 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2186 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2187 MachineSDNode *Store =
2188 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2189 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2190 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2191
2192 ReplaceNode(Node, Store);
2193 return;
2194 }
2195 case Intrinsic::riscv_sf_vc_x_se:
2196 case Intrinsic::riscv_sf_vc_i_se:
2197 selectSF_VC_X_SE(Node);
2198 return;
2199 }
2200 break;
2201 }
2202 case ISD::BITCAST: {
2203 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2204 // Just drop bitcasts between vectors if both are fixed or both are
2205 // scalable.
2206 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2207 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2208 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2209 CurDAG->RemoveDeadNode(Node);
2210 return;
2211 }
2212 break;
2213 }
2214 case ISD::INSERT_SUBVECTOR: {
2215 SDValue V = Node->getOperand(0);
2216 SDValue SubV = Node->getOperand(1);
2217 SDLoc DL(SubV);
2218 auto Idx = Node->getConstantOperandVal(2);
2219 MVT SubVecVT = SubV.getSimpleValueType();
2220
2221 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2222 MVT SubVecContainerVT = SubVecVT;
2223 // Establish the correct scalable-vector types for any fixed-length type.
2224 if (SubVecVT.isFixedLengthVector()) {
2225 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2227 [[maybe_unused]] bool ExactlyVecRegSized =
2228 Subtarget->expandVScale(SubVecVT.getSizeInBits())
2229 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2230 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2231 .getKnownMinValue()));
2232 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2233 }
2234 MVT ContainerVT = VT;
2235 if (VT.isFixedLengthVector())
2236 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2237
2238 const auto *TRI = Subtarget->getRegisterInfo();
2239 unsigned SubRegIdx;
2240 std::tie(SubRegIdx, Idx) =
2242 ContainerVT, SubVecContainerVT, Idx, TRI);
2243
2244 // If the Idx hasn't been completely eliminated then this is a subvector
2245 // insert which doesn't naturally align to a vector register. These must
2246 // be handled using instructions to manipulate the vector registers.
2247 if (Idx != 0)
2248 break;
2249
2250 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
2251 [[maybe_unused]] bool IsSubVecPartReg =
2252 SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
2253 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
2254 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
2255 assert((!IsSubVecPartReg || V.isUndef()) &&
2256 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2257 "the subvector is smaller than a full-sized register");
2258
2259 // If we haven't set a SubRegIdx, then we must be going between
2260 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2261 if (SubRegIdx == RISCV::NoSubRegister) {
2262 unsigned InRegClassID =
2265 InRegClassID &&
2266 "Unexpected subvector extraction");
2267 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2268 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2269 DL, VT, SubV, RC);
2270 ReplaceNode(Node, NewNode);
2271 return;
2272 }
2273
2274 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2275 ReplaceNode(Node, Insert.getNode());
2276 return;
2277 }
2279 SDValue V = Node->getOperand(0);
2280 auto Idx = Node->getConstantOperandVal(1);
2281 MVT InVT = V.getSimpleValueType();
2282 SDLoc DL(V);
2283
2284 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2285 MVT SubVecContainerVT = VT;
2286 // Establish the correct scalable-vector types for any fixed-length type.
2287 if (VT.isFixedLengthVector()) {
2288 assert(Idx == 0);
2289 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2290 }
2291 if (InVT.isFixedLengthVector())
2292 InVT = TLI.getContainerForFixedLengthVector(InVT);
2293
2294 const auto *TRI = Subtarget->getRegisterInfo();
2295 unsigned SubRegIdx;
2296 std::tie(SubRegIdx, Idx) =
2298 InVT, SubVecContainerVT, Idx, TRI);
2299
2300 // If the Idx hasn't been completely eliminated then this is a subvector
2301 // extract which doesn't naturally align to a vector register. These must
2302 // be handled using instructions to manipulate the vector registers.
2303 if (Idx != 0)
2304 break;
2305
2306 // If we haven't set a SubRegIdx, then we must be going between
2307 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2308 if (SubRegIdx == RISCV::NoSubRegister) {
2309 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2311 InRegClassID &&
2312 "Unexpected subvector extraction");
2313 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2314 SDNode *NewNode =
2315 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2316 ReplaceNode(Node, NewNode);
2317 return;
2318 }
2319
2320 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2321 ReplaceNode(Node, Extract.getNode());
2322 return;
2323 }
2327 case RISCVISD::VFMV_V_F_VL: {
2328 // Try to match splat of a scalar load to a strided load with stride of x0.
2329 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2330 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2331 if (!Node->getOperand(0).isUndef())
2332 break;
2333 SDValue Src = Node->getOperand(1);
2334 auto *Ld = dyn_cast<LoadSDNode>(Src);
2335 // Can't fold load update node because the second
2336 // output is used so that load update node can't be removed.
2337 if (!Ld || Ld->isIndexed())
2338 break;
2339 EVT MemVT = Ld->getMemoryVT();
2340 // The memory VT should be the same size as the element type.
2341 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2342 break;
2343 if (!IsProfitableToFold(Src, Node, Node) ||
2344 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2345 break;
2346
2347 SDValue VL;
2348 if (IsScalarMove) {
2349 // We could deal with more VL if we update the VSETVLI insert pass to
2350 // avoid introducing more VSETVLI.
2351 if (!isOneConstant(Node->getOperand(2)))
2352 break;
2353 selectVLOp(Node->getOperand(2), VL);
2354 } else
2355 selectVLOp(Node->getOperand(2), VL);
2356
2357 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2358 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2359
2360 // If VL=1, then we don't need to do a strided load and can just do a
2361 // regular load.
2362 bool IsStrided = !isOneConstant(VL);
2363
2364 // Only do a strided load if we have optimized zero-stride vector load.
2365 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2366 break;
2367
2369 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2370 Ld->getBasePtr()};
2371 if (IsStrided)
2372 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2374 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2375 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2376
2378 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2379 /*IsMasked*/ false, IsStrided, /*FF*/ false,
2380 Log2SEW, static_cast<unsigned>(LMUL));
2381 MachineSDNode *Load =
2382 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2383 // Update the chain.
2384 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2385 // Record the mem-refs
2386 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2387 // Replace the splat with the vlse.
2388 ReplaceNode(Node, Load);
2389 return;
2390 }
2391 case ISD::PREFETCH:
2392 unsigned Locality = Node->getConstantOperandVal(3);
2393 if (Locality > 2)
2394 break;
2395
2396 if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) {
2397 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2399
2400 int NontemporalLevel = 0;
2401 switch (Locality) {
2402 case 0:
2403 NontemporalLevel = 3; // NTL.ALL
2404 break;
2405 case 1:
2406 NontemporalLevel = 1; // NTL.PALL
2407 break;
2408 case 2:
2409 NontemporalLevel = 0; // NTL.P1
2410 break;
2411 default:
2412 llvm_unreachable("unexpected locality value.");
2413 }
2414
2415 if (NontemporalLevel & 0b1)
2417 if (NontemporalLevel & 0b10)
2419 }
2420 break;
2421 }
2422
2423 // Select the default instruction.
2424 SelectCode(Node);
2425}
2426
2428 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2429 std::vector<SDValue> &OutOps) {
2430 // Always produce a register and immediate operand, as expected by
2431 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2432 switch (ConstraintID) {
2435 SDValue Op0, Op1;
2436 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2437 assert(Found && "SelectAddrRegImm should always succeed");
2438 OutOps.push_back(Op0);
2439 OutOps.push_back(Op1);
2440 return false;
2441 }
2443 OutOps.push_back(Op);
2444 OutOps.push_back(
2445 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2446 return false;
2447 default:
2448 report_fatal_error("Unexpected asm memory constraint " +
2449 InlineAsm::getMemConstraintName(ConstraintID));
2450 }
2451
2452 return true;
2453}
2454
2456 SDValue &Offset) {
2457 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2458 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2459 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2460 return true;
2461 }
2462
2463 return false;
2464}
2465
2466// Select a frame index and an optional immediate offset from an ADD or OR.
2468 SDValue &Offset) {
2470 return true;
2471
2473 return false;
2474
2475 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
2476 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2477 if (isInt<12>(CVal)) {
2478 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
2479 Subtarget->getXLenVT());
2481 Subtarget->getXLenVT());
2482 return true;
2483 }
2484 }
2485
2486 return false;
2487}
2488
2489// Fold constant addresses.
2490static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2491 const MVT VT, const RISCVSubtarget *Subtarget,
2493 bool IsPrefetch = false) {
2494 if (!isa<ConstantSDNode>(Addr))
2495 return false;
2496
2497 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2498
2499 // If the constant is a simm12, we can fold the whole constant and use X0 as
2500 // the base. If the constant can be materialized with LUI+simm12, use LUI as
2501 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2502 int64_t Lo12 = SignExtend64<12>(CVal);
2503 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2504 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2505 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2506 return false;
2507
2508 if (Hi) {
2509 int64_t Hi20 = (Hi >> 12) & 0xfffff;
2510 Base = SDValue(
2511 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2512 CurDAG->getTargetConstant(Hi20, DL, VT)),
2513 0);
2514 } else {
2515 Base = CurDAG->getRegister(RISCV::X0, VT);
2516 }
2517 Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2518 return true;
2519 }
2520
2521 // Ask how constant materialization would handle this constant.
2522 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2523
2524 // If the last instruction would be an ADDI, we can fold its immediate and
2525 // emit the rest of the sequence as the base.
2526 if (Seq.back().getOpcode() != RISCV::ADDI)
2527 return false;
2528 Lo12 = Seq.back().getImm();
2529 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2530 return false;
2531
2532 // Drop the last instruction.
2533 Seq.pop_back();
2534 assert(!Seq.empty() && "Expected more instructions in sequence");
2535
2536 Base = selectImmSeq(CurDAG, DL, VT, Seq);
2537 Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2538 return true;
2539}
2540
2541// Is this ADD instruction only used as the base pointer of scalar loads and
2542// stores?
2544 for (auto *Use : Add->uses()) {
2545 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
2546 Use->getOpcode() != ISD::ATOMIC_LOAD &&
2547 Use->getOpcode() != ISD::ATOMIC_STORE)
2548 return false;
2549 EVT VT = cast<MemSDNode>(Use)->getMemoryVT();
2550 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2551 VT != MVT::f64)
2552 return false;
2553 // Don't allow stores of the value. It must be used as the address.
2554 if (Use->getOpcode() == ISD::STORE &&
2555 cast<StoreSDNode>(Use)->getValue() == Add)
2556 return false;
2557 if (Use->getOpcode() == ISD::ATOMIC_STORE &&
2558 cast<AtomicSDNode>(Use)->getVal() == Add)
2559 return false;
2560 }
2561
2562 return true;
2563}
2564
2566 unsigned MaxShiftAmount,
2568 SDValue &Scale) {
2569 EVT VT = Addr.getSimpleValueType();
2570 auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2571 SDValue &Shift) {
2572 uint64_t ShiftAmt = 0;
2573 Index = N;
2574
2575 if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
2576 // Only match shifts by a value in range [0, MaxShiftAmount].
2577 if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
2578 Index = N.getOperand(0);
2579 ShiftAmt = N.getConstantOperandVal(1);
2580 }
2581 }
2582
2583 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
2584 return ShiftAmt != 0;
2585 };
2586
2587 if (Addr.getOpcode() == ISD::ADD) {
2588 if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2589 SDValue AddrB = Addr.getOperand(0);
2590 if (AddrB.getOpcode() == ISD::ADD &&
2591 UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
2592 !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
2593 isInt<12>(C1->getSExtValue())) {
2594 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2595 SDValue C1Val =
2596 CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
2597 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
2598 AddrB.getOperand(1), C1Val),
2599 0);
2600 return true;
2601 }
2602 } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
2603 Base = Addr.getOperand(1);
2604 return true;
2605 } else {
2606 UnwrapShl(Addr.getOperand(1), Index, Scale);
2607 Base = Addr.getOperand(0);
2608 return true;
2609 }
2610 } else if (UnwrapShl(Addr, Index, Scale)) {
2611 EVT VT = Addr.getValueType();
2612 Base = CurDAG->getRegister(RISCV::X0, VT);
2613 return true;
2614 }
2615
2616 return false;
2617}
2618
2620 SDValue &Offset, bool IsINX) {
2622 return true;
2623
2624 SDLoc DL(Addr);
2625 MVT VT = Addr.getSimpleValueType();
2626
2627 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2628 Base = Addr.getOperand(0);
2629 Offset = Addr.getOperand(1);
2630 return true;
2631 }
2632
2633 int64_t RV32ZdinxRange = IsINX ? 4 : 0;
2635 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2636 if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
2637 Base = Addr.getOperand(0);
2638 if (Base.getOpcode() == RISCVISD::ADD_LO) {
2639 SDValue LoOperand = Base.getOperand(1);
2640 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2641 // If the Lo in (ADD_LO hi, lo) is a global variable's address
2642 // (its low part, really), then we can rely on the alignment of that
2643 // variable to provide a margin of safety before low part can overflow
2644 // the 12 bits of the load/store offset. Check if CVal falls within
2645 // that margin; if so (low part + CVal) can't overflow.
2646 const DataLayout &DL = CurDAG->getDataLayout();
2647 Align Alignment = commonAlignment(
2648 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2649 if (CVal == 0 || Alignment > CVal) {
2650 int64_t CombinedOffset = CVal + GA->getOffset();
2651 Base = Base.getOperand(0);
2653 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2654 CombinedOffset, GA->getTargetFlags());
2655 return true;
2656 }
2657 }
2658 }
2659
2660 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2661 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2662 Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2663 return true;
2664 }
2665 }
2666
2667 // Handle ADD with large immediates.
2668 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2669 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2670 assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
2671 "simm12 not already handled?");
2672
2673 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2674 // an ADDI for part of the offset and fold the rest into the load/store.
2675 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2676 if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
2677 int64_t Adj = CVal < 0 ? -2048 : 2047;
2678 Base = SDValue(
2679 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2680 CurDAG->getTargetConstant(Adj, DL, VT)),
2681 0);
2682 Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);
2683 return true;
2684 }
2685
2686 // For larger immediates, we might be able to save one instruction from
2687 // constant materialization by folding the Lo12 bits of the immediate into
2688 // the address. We should only do this if the ADD is only used by loads and
2689 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2690 // separately with the full materialized immediate creating extra
2691 // instructions.
2692 if (isWorthFoldingAdd(Addr) &&
2693 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2694 Offset)) {
2695 // Insert an ADD instruction with the materialized Hi52 bits.
2696 Base = SDValue(
2697 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2698 0);
2699 return true;
2700 }
2701 }
2702
2703 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset))
2704 return true;
2705
2706 Base = Addr;
2707 Offset = CurDAG->getTargetConstant(0, DL, VT);
2708 return true;
2709}
2710
2711/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2712/// Offset shoule be all zeros.
2714 SDValue &Offset) {
2716 return true;
2717
2718 SDLoc DL(Addr);
2719 MVT VT = Addr.getSimpleValueType();
2720
2722 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2723 if (isInt<12>(CVal)) {
2724 Base = Addr.getOperand(0);
2725
2726 // Early-out if not a valid offset.
2727 if ((CVal & 0b11111) != 0) {
2728 Base = Addr;
2729 Offset = CurDAG->getTargetConstant(0, DL, VT);
2730 return true;
2731 }
2732
2733 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2734 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2735 Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2736 return true;
2737 }
2738 }
2739
2740 // Handle ADD with large immediates.
2741 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2742 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2743 assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
2744 "simm12 not already handled?");
2745
2746 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
2747 // one instruction by folding adjustment (-2048 or 2016) into the address.
2748 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
2749 int64_t Adj = CVal < 0 ? -2048 : 2016;
2750 int64_t AdjustedOffset = CVal - Adj;
2752 RISCV::ADDI, DL, VT, Addr.getOperand(0),
2753 CurDAG->getTargetConstant(AdjustedOffset, DL, VT)),
2754 0);
2755 Offset = CurDAG->getTargetConstant(Adj, DL, VT);
2756 return true;
2757 }
2758
2759 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2760 Offset, true)) {
2761 // Insert an ADD instruction with the materialized Hi52 bits.
2762 Base = SDValue(
2763 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2764 0);
2765 return true;
2766 }
2767 }
2768
2769 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true))
2770 return true;
2771
2772 Base = Addr;
2773 Offset = CurDAG->getTargetConstant(0, DL, VT);
2774 return true;
2775}
2776
2778 SDValue &Offset) {
2779 if (Addr.getOpcode() != ISD::ADD)
2780 return false;
2781
2782 if (isa<ConstantSDNode>(Addr.getOperand(1)))
2783 return false;
2784
2785 Base = Addr.getOperand(1);
2786 Offset = Addr.getOperand(0);
2787 return true;
2788}
2789
2791 SDValue &ShAmt) {
2792 ShAmt = N;
2793
2794 // Peek through zext.
2795 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
2796 ShAmt = ShAmt.getOperand(0);
2797
2798 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
2799 // amount. If there is an AND on the shift amount, we can bypass it if it
2800 // doesn't affect any of those bits.
2801 if (ShAmt.getOpcode() == ISD::AND &&
2802 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2803 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
2804
2805 // Since the max shift amount is a power of 2 we can subtract 1 to make a
2806 // mask that covers the bits needed to represent all shift amounts.
2807 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
2808 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
2809
2810 if (ShMask.isSubsetOf(AndMask)) {
2811 ShAmt = ShAmt.getOperand(0);
2812 } else {
2813 // SimplifyDemandedBits may have optimized the mask so try restoring any
2814 // bits that are known zero.
2815 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
2816 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
2817 return true;
2818 ShAmt = ShAmt.getOperand(0);
2819 }
2820 }
2821
2822 if (ShAmt.getOpcode() == ISD::ADD &&
2823 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2824 uint64_t Imm = ShAmt.getConstantOperandVal(1);
2825 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
2826 // to avoid the ADD.
2827 if (Imm != 0 && Imm % ShiftWidth == 0) {
2828 ShAmt = ShAmt.getOperand(0);
2829 return true;
2830 }
2831 } else if (ShAmt.getOpcode() == ISD::SUB &&
2832 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
2833 uint64_t Imm = ShAmt.getConstantOperandVal(0);
2834 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2835 // generate a NEG instead of a SUB of a constant.
2836 if (Imm != 0 && Imm % ShiftWidth == 0) {
2837 SDLoc DL(ShAmt);
2838 EVT VT = ShAmt.getValueType();
2839 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
2840 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
2841 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
2842 ShAmt.getOperand(1));
2843 ShAmt = SDValue(Neg, 0);
2844 return true;
2845 }
2846 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2847 // to generate a NOT instead of a SUB of a constant.
2848 if (Imm % ShiftWidth == ShiftWidth - 1) {
2849 SDLoc DL(ShAmt);
2850 EVT VT = ShAmt.getValueType();
2851 MachineSDNode *Not =
2852 CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1),
2853 CurDAG->getTargetConstant(-1, DL, VT));
2854 ShAmt = SDValue(Not, 0);
2855 return true;
2856 }
2857 }
2858
2859 return true;
2860}
2861
2862/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2863/// check for equality with 0. This function emits instructions that convert the
2864/// seteq/setne into something that can be compared with 0.
2865/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
2866/// ISD::SETNE).
2868 SDValue &Val) {
2869 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
2870 "Unexpected condition code!");
2871
2872 // We're looking for a setcc.
2873 if (N->getOpcode() != ISD::SETCC)
2874 return false;
2875
2876 // Must be an equality comparison.
2877 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
2878 if (CCVal != ExpectedCCVal)
2879 return false;
2880
2881 SDValue LHS = N->getOperand(0);
2882 SDValue RHS = N->getOperand(1);
2883
2884 if (!LHS.getValueType().isScalarInteger())
2885 return false;
2886
2887 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
2888 if (isNullConstant(RHS)) {
2889 Val = LHS;
2890 return true;
2891 }
2892
2893 SDLoc DL(N);
2894
2895 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
2896 int64_t CVal = C->getSExtValue();
2897 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2898 // non-zero otherwise.
2899 if (CVal == -2048) {
2900 Val =
2902 RISCV::XORI, DL, N->getValueType(0), LHS,
2903 CurDAG->getTargetConstant(CVal, DL, N->getValueType(0))),
2904 0);
2905 return true;
2906 }
2907 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
2908 // LHS is equal to the RHS and non-zero otherwise.
2909 if (isInt<12>(CVal) || CVal == 2048) {
2910 Val =
2912 RISCV::ADDI, DL, N->getValueType(0), LHS,
2913 CurDAG->getTargetConstant(-CVal, DL, N->getValueType(0))),
2914 0);
2915 return true;
2916 }
2917 }
2918
2919 // If nothing else we can XOR the LHS and RHS to produce zero if they are
2920 // equal and a non-zero value if they aren't.
2921 Val = SDValue(
2922 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
2923 return true;
2924}
2925
2927 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
2928 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
2929 Val = N.getOperand(0);
2930 return true;
2931 }
2932
2933 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
2934 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
2935 return N;
2936
2937 SDValue N0 = N.getOperand(0);
2938 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2939 N.getConstantOperandVal(1) == ShiftAmt &&
2940 N0.getConstantOperandVal(1) == ShiftAmt)
2941 return N0.getOperand(0);
2942
2943 return N;
2944 };
2945
2946 MVT VT = N.getSimpleValueType();
2947 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
2948 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
2949 return true;
2950 }
2951
2952 return false;
2953}
2954
2956 if (N.getOpcode() == ISD::AND) {
2957 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
2958 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
2959 Val = N.getOperand(0);
2960 return true;
2961 }
2962 }
2963 MVT VT = N.getSimpleValueType();
2964 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
2965 if (CurDAG->MaskedValueIsZero(N, Mask)) {
2966 Val = N;
2967 return true;
2968 }
2969
2970 return false;
2971}
2972
2973/// Look for various patterns that can be done with a SHL that can be folded
2974/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
2975/// SHXADD we are trying to match.
2977 SDValue &Val) {
2978 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
2979 SDValue N0 = N.getOperand(0);
2980
2981 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
2982 (LeftShift || N0.getOpcode() == ISD::SRL) &&
2983 isa<ConstantSDNode>(N0.getOperand(1))) {
2984 uint64_t Mask = N.getConstantOperandVal(1);
2985 unsigned C2 = N0.getConstantOperandVal(1);
2986
2987 unsigned XLen = Subtarget->getXLen();
2988 if (LeftShift)
2989 Mask &= maskTrailingZeros<uint64_t>(C2);
2990 else
2991 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
2992
2993 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
2994 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
2995 // followed by a SHXADD with c3 for the X amount.
2996 if (isShiftedMask_64(Mask)) {
2997 unsigned Leading = XLen - llvm::bit_width(Mask);
2998 unsigned Trailing = llvm::countr_zero(Mask);
2999 if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
3000 SDLoc DL(N);
3001 EVT VT = N.getValueType();
3003 RISCV::SRLI, DL, VT, N0.getOperand(0),
3004 CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
3005 0);
3006 return true;
3007 }
3008 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3009 // leading zeros and c3 trailing zeros. We can use an SRLI by C3
3010 // followed by a SHXADD using c3 for the X amount.
3011 if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
3012 SDLoc DL(N);
3013 EVT VT = N.getValueType();
3014 Val = SDValue(
3016 RISCV::SRLI, DL, VT, N0.getOperand(0),
3017 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
3018 0);
3019 return true;
3020 }
3021 }
3022 }
3023 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
3024 (LeftShift || N.getOpcode() == ISD::SRL) &&
3025 isa<ConstantSDNode>(N.getOperand(1))) {
3026 SDValue N0 = N.getOperand(0);
3027 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
3028 isa<ConstantSDNode>(N0.getOperand(1))) {
3029 uint64_t Mask = N0.getConstantOperandVal(1);
3030 if (isShiftedMask_64(Mask)) {
3031 unsigned C1 = N.getConstantOperandVal(1);
3032 unsigned XLen = Subtarget->getXLen();
3033 unsigned Leading = XLen - llvm::bit_width(Mask);
3034 unsigned Trailing = llvm::countr_zero(Mask);
3035 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
3036 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
3037 if (LeftShift && Leading == 32 && Trailing > 0 &&
3038 (Trailing + C1) == ShAmt) {
3039 SDLoc DL(N);
3040 EVT VT = N.getValueType();
3042 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3043 CurDAG->getTargetConstant(Trailing, DL, VT)),
3044 0);
3045 return true;
3046 }
3047 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3048 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3049 if (!LeftShift && Leading == 32 && Trailing > C1 &&
3050 (Trailing - C1) == ShAmt) {
3051 SDLoc DL(N);
3052 EVT VT = N.getValueType();
3054 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3055 CurDAG->getTargetConstant(Trailing, DL, VT)),
3056 0);
3057 return true;
3058 }
3059 }
3060 }
3061 }
3062
3063 return false;
3064}
3065
3066/// Look for various patterns that can be done with a SHL that can be folded
3067/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3068/// SHXADD_UW we are trying to match.
3070 SDValue &Val) {
3071 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
3072 N.hasOneUse()) {
3073 SDValue N0 = N.getOperand(0);
3074 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3075 N0.hasOneUse()) {
3076 uint64_t Mask = N.getConstantOperandVal(1);
3077 unsigned C2 = N0.getConstantOperandVal(1);
3078
3079 Mask &= maskTrailingZeros<uint64_t>(C2);
3080
3081 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3082 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3083 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3084 if (isShiftedMask_64(Mask)) {
3085 unsigned Leading = llvm::countl_zero(Mask);
3086 unsigned Trailing = llvm::countr_zero(Mask);
3087 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3088 SDLoc DL(N);
3089 EVT VT = N.getValueType();
3091 RISCV::SLLI, DL, VT, N0.getOperand(0),
3092 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
3093 0);
3094 return true;
3095 }
3096 }
3097 }
3098 }
3099
3100 return false;
3101}
3102
3103static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
3104 unsigned Bits,
3105 const TargetInstrInfo *TII) {
3106 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
3107
3108 if (!MCOpcode)
3109 return false;
3110
3111 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
3112 const uint64_t TSFlags = MCID.TSFlags;
3113 if (!RISCVII::hasSEWOp(TSFlags))
3114 return false;
3115 assert(RISCVII::hasVLOp(TSFlags));
3116
3117 bool HasGlueOp = User->getGluedNode() != nullptr;
3118 unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
3119 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
3120 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
3121 unsigned VLIdx =
3122 User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3123 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
3124
3125 if (UserOpNo == VLIdx)
3126 return false;
3127
3128 auto NumDemandedBits =
3129 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
3130 return NumDemandedBits && Bits >= *NumDemandedBits;
3131}
3132
3133// Return true if all users of this SDNode* only consume the lower \p Bits.
3134// This can be used to form W instructions for add/sub/mul/shl even when the
3135// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3136// SimplifyDemandedBits has made it so some users see a sext_inreg and some
3137// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3138// the add/sub/mul/shl to become non-W instructions. By checking the users we
3139// may be able to use a W instruction and CSE with the other instruction if
3140// this has happened. We could try to detect that the CSE opportunity exists
3141// before doing this, but that would be more complicated.
3143 const unsigned Depth) const {
3144 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3145 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3146 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3147 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3148 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3149 isa<ConstantSDNode>(Node) || Depth != 0) &&
3150 "Unexpected opcode");
3151
3153 return false;
3154
3155 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3156 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3157 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
3158 return false;
3159
3160 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
3161 SDNode *User = *UI;
3162 // Users of this node should have already been instruction selected
3163 if (!User->isMachineOpcode())
3164 return false;
3165
3166 // TODO: Add more opcodes?
3167 switch (User->getMachineOpcode()) {
3168 default:
3169 if (vectorPseudoHasAllNBitUsers(User, UI.getOperandNo(), Bits, TII))
3170 break;
3171 return false;
3172 case RISCV::ADDW:
3173 case RISCV::ADDIW:
3174 case RISCV::SUBW:
3175 case RISCV::MULW:
3176 case RISCV::SLLW:
3177 case RISCV::SLLIW:
3178 case RISCV::SRAW:
3179 case RISCV::SRAIW:
3180 case RISCV::SRLW:
3181 case RISCV::SRLIW:
3182 case RISCV::DIVW:
3183 case RISCV::DIVUW:
3184 case RISCV::REMW:
3185 case RISCV::REMUW:
3186 case RISCV::ROLW:
3187 case RISCV::RORW:
3188 case RISCV::RORIW:
3189 case RISCV::CLZW:
3190 case RISCV::CTZW:
3191 case RISCV::CPOPW:
3192 case RISCV::SLLI_UW:
3193 case RISCV::FMV_W_X:
3194 case RISCV::FCVT_H_W:
3195 case RISCV::FCVT_H_W_INX:
3196 case RISCV::FCVT_H_WU:
3197 case RISCV::FCVT_H_WU_INX:
3198 case RISCV::FCVT_S_W:
3199 case RISCV::FCVT_S_W_INX:
3200 case RISCV::FCVT_S_WU:
3201 case RISCV::FCVT_S_WU_INX:
3202 case RISCV::FCVT_D_W:
3203 case RISCV::FCVT_D_W_INX:
3204 case RISCV::FCVT_D_WU:
3205 case RISCV::FCVT_D_WU_INX:
3206 case RISCV::TH_REVW:
3207 case RISCV::TH_SRRIW:
3208 if (Bits >= 32)
3209 break;
3210 return false;
3211 case RISCV::SLL:
3212 case RISCV::SRA:
3213 case RISCV::SRL:
3214 case RISCV::ROL:
3215 case RISCV::ROR:
3216 case RISCV::BSET:
3217 case RISCV::BCLR:
3218 case RISCV::BINV:
3219 // Shift amount operands only use log2(Xlen) bits.
3220 if (UI.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
3221 break;
3222 return false;
3223 case RISCV::SLLI:
3224 // SLLI only uses the lower (XLen - ShAmt) bits.
3225 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
3226 break;
3227 return false;
3228 case RISCV::ANDI:
3229 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
3230 break;
3231 goto RecCheck;
3232 case RISCV::ORI: {
3233 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3234 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
3235 break;
3236 [[fallthrough]];
3237 }
3238 case RISCV::AND:
3239 case RISCV::OR:
3240 case RISCV::XOR:
3241 case RISCV::XORI:
3242 case RISCV::ANDN:
3243 case RISCV::ORN:
3244 case RISCV::XNOR:
3245 case RISCV::SH1ADD:
3246 case RISCV::SH2ADD:
3247 case RISCV::SH3ADD:
3248 RecCheck:
3249 if (hasAllNBitUsers(User, Bits, Depth + 1))
3250 break;
3251 return false;
3252 case RISCV::SRLI: {
3253 unsigned ShAmt = User->getConstantOperandVal(1);
3254 // If we are shifting right by less than Bits, and users don't demand any
3255 // bits that were shifted into [Bits-1:0], then we can consider this as an
3256 // N-Bit user.
3257 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
3258 break;
3259 return false;
3260 }
3261 case RISCV::SEXT_B:
3262 case RISCV::PACKH:
3263 if (Bits >= 8)
3264 break;
3265 return false;
3266 case RISCV::SEXT_H:
3267 case RISCV::FMV_H_X:
3268 case RISCV::ZEXT_H_RV32:
3269 case RISCV::ZEXT_H_RV64:
3270 case RISCV::PACKW:
3271 if (Bits >= 16)
3272 break;
3273 return false;
3274 case RISCV::PACK:
3275 if (Bits >= (Subtarget->getXLen() / 2))
3276 break;
3277 return false;
3278 case RISCV::ADD_UW:
3279 case RISCV::SH1ADD_UW:
3280 case RISCV::SH2ADD_UW:
3281 case RISCV::SH3ADD_UW:
3282 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3283 // 32 bits.
3284 if (UI.getOperandNo() == 0 && Bits >= 32)
3285 break;
3286 return false;
3287 case RISCV::SB:
3288 if (UI.getOperandNo() == 0 && Bits >= 8)
3289 break;
3290 return false;
3291 case RISCV::SH:
3292 if (UI.getOperandNo() == 0 && Bits >= 16)
3293 break;
3294 return false;
3295 case RISCV::SW:
3296 if (UI.getOperandNo() == 0 && Bits >= 32)
3297 break;
3298 return false;
3299 }
3300 }
3301
3302 return true;
3303}
3304
3305// Select a constant that can be represented as (sign_extend(imm5) << imm2).
3307 SDValue &Shl2) {
3308 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3309 int64_t Offset = C->getSExtValue();
3310 int64_t Shift;
3311 for (Shift = 0; Shift < 4; Shift++)
3312 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
3313 break;
3314
3315 // Constant cannot be encoded.
3316 if (Shift == 4)
3317 return false;
3318
3319 EVT Ty = N->getValueType(0);
3320 Simm5 = CurDAG->getTargetConstant(Offset >> Shift, SDLoc(N), Ty);
3321 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
3322 return true;
3323 }
3324
3325 return false;
3326}
3327
3328// Select VL as a 5 bit immediate or a value that will become a register. This
3329// allows us to choose betwen VSETIVLI or VSETVLI later.
3331 auto *C = dyn_cast<ConstantSDNode>(N);
3332 if (C && isUInt<5>(C->getZExtValue())) {
3333 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
3334 N->getValueType(0));
3335 } else if (C && C->isAllOnes()) {
3336 // Treat all ones as VLMax.
3338 N->getValueType(0));
3339 } else if (isa<RegisterSDNode>(N) &&
3340 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
3341 // All our VL operands use an operand that allows GPRNoX0 or an immediate
3342 // as the register class. Convert X0 to a special immediate to pass the
3343 // MachineVerifier. This is recognized specially by the vsetvli insertion
3344 // pass.
3346 N->getValueType(0));
3347 } else {
3348 VL = N;
3349 }
3350
3351 return true;
3352}
3353
3355 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
3356 if (!N.getOperand(0).isUndef())
3357 return SDValue();
3358 N = N.getOperand(1);
3359 }
3360 SDValue Splat = N;
3361 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
3362 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
3363 !Splat.getOperand(0).isUndef())
3364 return SDValue();
3365 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
3366 return Splat;
3367}
3368
3371 if (!Splat)
3372 return false;
3373
3374 SplatVal = Splat.getOperand(1);
3375 return true;
3376}
3377
3379 SelectionDAG &DAG,
3380 const RISCVSubtarget &Subtarget,
3381 std::function<bool(int64_t)> ValidateImm) {
3383 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
3384 return false;
3385
3386 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
3387 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
3388 "Unexpected splat operand type");
3389
3390 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3391 // type is wider than the resulting vector element type: an implicit
3392 // truncation first takes place. Therefore, perform a manual
3393 // truncation/sign-extension in order to ignore any truncated bits and catch
3394 // any zero-extended immediate.
3395 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3396 // sign-extending to (XLenVT -1).
3397 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
3398
3399 int64_t SplatImm = SplatConst.getSExtValue();
3400
3401 if (!ValidateImm(SplatImm))
3402 return false;
3403
3404 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
3405 return true;
3406}
3407
3409 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
3410 [](int64_t Imm) { return isInt<5>(Imm); });
3411}
3412
3414 return selectVSplatImmHelper(
3415 N, SplatVal, *CurDAG, *Subtarget,
3416 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
3417}
3418
3420 SDValue &SplatVal) {
3421 return selectVSplatImmHelper(
3422 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
3423 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
3424 });
3425}
3426
3428 SDValue &SplatVal) {
3429 return selectVSplatImmHelper(
3430 N, SplatVal, *CurDAG, *Subtarget,
3431 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
3432}
3433
3435 auto IsExtOrTrunc = [](SDValue N) {
3436 switch (N->getOpcode()) {
3437 case ISD::SIGN_EXTEND:
3438 case ISD::ZERO_EXTEND:
3439 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
3440 // inactive elements will be undef.
3442 case RISCVISD::VSEXT_VL:
3443 case RISCVISD::VZEXT_VL:
3444 return true;
3445 default:
3446 return false;
3447 }
3448 };
3449
3450 // We can have multiple nested nodes, so unravel them all if needed.
3451 while (IsExtOrTrunc(N)) {
3452 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
3453 return false;
3454 N = N->getOperand(0);
3455 }
3456
3457 return selectVSplat(N, SplatVal);
3458}
3459
3461 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3462 if (!CFP)
3463 return false;
3464 const APFloat &APF = CFP->getValueAPF();
3465 // td can handle +0.0 already.
3466 if (APF.isPosZero())
3467 return false;
3468
3469 MVT VT = CFP->getSimpleValueType(0);
3470
3471 // Even if this FPImm requires an additional FNEG (i.e. the second element of
3472 // the returned pair is true) we still prefer FLI + FNEG over immediate
3473 // materialization as the latter might generate a longer instruction sequence.
3474 if (static_cast<const RISCVTargetLowering *>(TLI)
3475 ->getLegalZfaFPImm(APF, VT)
3476 .first >= 0)
3477 return false;
3478
3479 MVT XLenVT = Subtarget->getXLenVT();
3480 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3481 assert(APF.isNegZero() && "Unexpected constant.");
3482 return false;
3483 }
3484 SDLoc DL(N);
3485 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3486 *Subtarget);
3487 return true;
3488}
3489
3491 SDValue &Imm) {
3492 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3493 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3494
3495 if (!isInt<5>(ImmVal))
3496 return false;
3497
3498 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());
3499 return true;
3500 }
3501
3502 return false;
3503}
3504
3505// Try to remove sext.w if the input is a W instruction or can be made into
3506// a W instruction cheaply.
3507bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3508 // Look for the sext.w pattern, addiw rd, rs1, 0.
3509 if (N->getMachineOpcode() != RISCV::ADDIW ||
3510 !isNullConstant(N->getOperand(1)))
3511 return false;
3512
3513 SDValue N0 = N->getOperand(0);
3514 if (!N0.isMachineOpcode())
3515 return false;
3516
3517 switch (N0.getMachineOpcode()) {
3518 default:
3519 break;
3520 case RISCV::ADD:
3521 case RISCV::ADDI:
3522 case RISCV::SUB:
3523 case RISCV::MUL:
3524 case RISCV::SLLI: {
3525 // Convert sext.w+add/sub/mul to their W instructions. This will create
3526 // a new independent instruction. This improves latency.
3527 unsigned Opc;
3528 switch (N0.getMachineOpcode()) {
3529 default:
3530 llvm_unreachable("Unexpected opcode!");
3531 case RISCV::ADD: Opc = RISCV::ADDW; break;
3532 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3533 case RISCV::SUB: Opc = RISCV::SUBW; break;
3534 case RISCV::MUL: Opc = RISCV::MULW; break;
3535 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3536 }
3537
3538 SDValue N00 = N0.getOperand(0);
3539 SDValue N01 = N0.getOperand(1);
3540
3541 // Shift amount needs to be uimm5.
3542 if (N0.getMachineOpcode() == RISCV::SLLI &&
3543 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
3544 break;
3545
3546 SDNode *Result =
3547 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
3548 N00, N01);
3549 ReplaceUses(N, Result);
3550 return true;
3551 }
3552 case RISCV::ADDW:
3553 case RISCV::ADDIW:
3554 case RISCV::SUBW:
3555 case RISCV::MULW:
3556 case RISCV::SLLIW:
3557 case RISCV::PACKW:
3558 case RISCV::TH_MULAW:
3559 case RISCV::TH_MULAH:
3560 case RISCV::TH_MULSW:
3561 case RISCV::TH_MULSH:
3562 if (N0.getValueType() == MVT::i32)
3563 break;
3564
3565 // Result is already sign extended just remove the sext.w.
3566 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
3567 ReplaceUses(N, N0.getNode());
3568 return true;
3569 }
3570
3571 return false;
3572}
3573
3574// After ISel, a vector pseudo's mask will be copied to V0 via a CopyToReg
3575// that's glued to the pseudo. This tries to look up the value that was copied
3576// to V0.
3577static SDValue getMaskSetter(SDValue MaskOp, SDValue GlueOp) {
3578 // Check that we're using V0 as a mask register.
3579 if (!isa<RegisterSDNode>(MaskOp) ||
3580 cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)
3581 return SDValue();
3582
3583 // The glued user defines V0.
3584 const auto *Glued = GlueOp.getNode();
3585
3586 if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
3587 return SDValue();
3588
3589 // Check that we're defining V0 as a mask register.
3590 if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
3591 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
3592 return SDValue();
3593
3594 SDValue MaskSetter = Glued->getOperand(2);
3595
3596 // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
3597 // from an extract_subvector or insert_subvector.
3598 if (MaskSetter->isMachineOpcode() &&
3599 MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)
3600 MaskSetter = MaskSetter->getOperand(0);
3601
3602 return MaskSetter;
3603}
3604
3605static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
3606 // Check the instruction defining V0; it needs to be a VMSET pseudo.
3607 SDValue MaskSetter = getMaskSetter(MaskOp, GlueOp);
3608 if (!MaskSetter)
3609 return false;
3610
3611 const auto IsVMSet = [](unsigned Opc) {
3612 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
3613 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
3614 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
3615 Opc == RISCV::PseudoVMSET_M_B8;
3616 };
3617
3618 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3619 // undefined behaviour if it's the wrong bitwidth, so we could choose to
3620 // assume that it's all-ones? Same applies to its VL.
3621 return MaskSetter->isMachineOpcode() &&
3622 IsVMSet(MaskSetter.getMachineOpcode());
3623}
3624
3625// Return true if we can make sure mask of N is all-ones mask.
3626static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
3627 return usesAllOnesMask(N->getOperand(MaskOpIdx),
3628 N->getOperand(N->getNumOperands() - 1));
3629}
3630
3631static bool isImplicitDef(SDValue V) {
3632 if (!V.isMachineOpcode())
3633 return false;
3634 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
3635 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
3636 if (!isImplicitDef(V.getOperand(I)))
3637 return false;
3638 return true;
3639 }
3640 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
3641}
3642
3643// Optimize masked RVV pseudo instructions with a known all-ones mask to their
3644// corresponding "unmasked" pseudo versions. The mask we're interested in will
3645// take the form of a V0 physical register operand, with a glued
3646// register-setting instruction.
3647bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
3649 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
3650 if (!I)
3651 return false;
3652
3653 unsigned MaskOpIdx = I->MaskOpIdx;
3654 if (!usesAllOnesMask(N, MaskOpIdx))
3655 return false;
3656
3657 // There are two classes of pseudos in the table - compares and
3658 // everything else. See the comment on RISCVMaskedPseudo for details.
3659 const unsigned Opc = I->UnmaskedPseudo;
3660 const MCInstrDesc &MCID = TII->get(Opc);
3661 const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags);
3662#ifndef NDEBUG
3663 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
3666 "Masked and unmasked pseudos are inconsistent");
3667 const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID);
3668 assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure");
3669#endif
3670
3672 // Skip the passthru operand at index 0 if !UseTUPseudo.
3673 for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) {
3674 // Skip the mask, and the Glue.
3675 SDValue Op = N->getOperand(I);
3676 if (I == MaskOpIdx || Op.getValueType() == MVT::Glue)
3677 continue;
3678 Ops.push_back(Op);
3679 }
3680
3681 // Transitively apply any node glued to our new node.
3682 const auto *Glued = N->getGluedNode();
3683 if (auto *TGlued = Glued->getGluedNode())
3684 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
3685
3687 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3688
3689 if (!N->memoperands_empty())
3690 CurDAG->setNodeMemRefs(Result, N->memoperands());
3691
3692 Result->setFlags(N->getFlags());
3693 ReplaceUses(N, Result);
3694
3695 return true;
3696}
3697
3698static bool IsVMerge(SDNode *N) {
3699 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
3700}
3701
3702static bool IsVMv(SDNode *N) {
3703 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V;
3704}
3705
3706static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) {
3707 switch (LMUL) {
3708 case RISCVII::LMUL_F8:
3709 return RISCV::PseudoVMSET_M_B1;
3710 case RISCVII::LMUL_F4:
3711 return RISCV::PseudoVMSET_M_B2;
3712 case RISCVII::LMUL_F2:
3713 return RISCV::PseudoVMSET_M_B4;
3714 case RISCVII::LMUL_1:
3715 return RISCV::PseudoVMSET_M_B8;
3716 case RISCVII::LMUL_2:
3717 return RISCV::PseudoVMSET_M_B16;
3718 case RISCVII::LMUL_4:
3719 return RISCV::PseudoVMSET_M_B32;
3720 case RISCVII::LMUL_8:
3721 return RISCV::PseudoVMSET_M_B64;
3723 llvm_unreachable("Unexpected LMUL");
3724 }
3725 llvm_unreachable("Unknown VLMUL enum");
3726}
3727
3728// Try to fold away VMERGE_VVM instructions into their true operands:
3729//
3730// %true = PseudoVADD_VV ...
3731// %x = PseudoVMERGE_VVM %false, %false, %true, %mask
3732// ->
3733// %x = PseudoVADD_VV_MASK %false, ..., %mask
3734//
3735// We can only fold if vmerge's passthru operand, vmerge's false operand and
3736// %true's passthru operand (if it has one) are the same. This is because we
3737// have to consolidate them into one passthru operand in the result.
3738//
3739// If %true is masked, then we can use its mask instead of vmerge's if vmerge's
3740// mask is all ones.
3741//
3742// We can also fold a VMV_V_V into its true operand, since it is equivalent to a
3743// VMERGE_VVM with an all ones mask.
3744//
3745// The resulting VL is the minimum of the two VLs.
3746//
3747// The resulting policy is the effective policy the vmerge would have had,
3748// i.e. whether or not it's passthru operand was implicit-def.
3749bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
3750 SDValue Passthru, False, True, VL, Mask, Glue;
3751 // A vmv.v.v is equivalent to a vmerge with an all-ones mask.
3752 if (IsVMv(N)) {
3753 Passthru = N->getOperand(0);
3754 False = N->getOperand(0);
3755 True = N->getOperand(1);
3756 VL = N->getOperand(2);
3757 // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones
3758 // mask later below.
3759 } else {
3760 assert(IsVMerge(N));
3761 Passthru = N->getOperand(0);
3762 False = N->getOperand(1);
3763 True = N->getOperand(2);
3764 Mask = N->getOperand(3);
3765 VL = N->getOperand(4);
3766 // We always have a glue node for the mask at v0.
3767 Glue = N->getOperand(N->getNumOperands() - 1);
3768 }
3769 assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
3770 assert(!Glue || Glue.getValueType() == MVT::Glue);
3771
3772 // If the EEW of True is different from vmerge's SEW, then we can't fold.
3773 if (True.getSimpleValueType() != N->getSimpleValueType(0))
3774 return false;
3775
3776 // We require that either passthru and false are the same, or that passthru
3777 // is undefined.
3778 if (Passthru != False && !isImplicitDef(Passthru))
3779 return false;
3780
3781 assert(True.getResNo() == 0 &&
3782 "Expect True is the first output of an instruction.");
3783
3784 // Need N is the exactly one using True.
3785 if (!True.hasOneUse())
3786 return false;
3787
3788 if (!True.isMachineOpcode())
3789 return false;
3790
3791 unsigned TrueOpc = True.getMachineOpcode();
3792 const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
3793 uint64_t TrueTSFlags = TrueMCID.TSFlags;
3794 bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
3795
3796 bool IsMasked = false;
3798 RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
3799 if (!Info && HasTiedDest) {
3800 Info = RISCV::getMaskedPseudoInfo(TrueOpc);
3801 IsMasked = true;
3802 }
3803 assert(!(IsMasked && !HasTiedDest) && "Expected tied dest");
3804
3805 if (!Info)
3806 return false;
3807
3808 // If True has a passthru operand then it needs to be the same as vmerge's
3809 // False, since False will be used for the result's passthru operand.
3810 if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
3811 SDValue PassthruOpTrue = True->getOperand(0);
3812 if (False != PassthruOpTrue)
3813 return false;
3814 }
3815
3816 // If True is masked then the vmerge must have either the same mask or an all
3817 // 1s mask, since we're going to keep the mask from True.
3818 if (IsMasked && Mask) {
3819 // FIXME: Support mask agnostic True instruction which would have an
3820 // undef passthru operand.
3821 SDValue TrueMask =
3822 getMaskSetter(True->getOperand(Info->MaskOpIdx),
3823 True->getOperand(True->getNumOperands() - 1));
3824 assert(TrueMask);
3825 if (!usesAllOnesMask(Mask, Glue) && getMaskSetter(Mask, Glue) != TrueMask)
3826 return false;
3827 }
3828
3829 // Skip if True has side effect.
3830 if (TII->get(TrueOpc).hasUnmodeledSideEffects())
3831 return false;
3832
3833 // The last operand of a masked instruction may be glued.
3834 bool HasGlueOp = True->getGluedNode() != nullptr;
3835
3836 // The chain operand may exist either before the glued operands or in the last
3837 // position.
3838 unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
3839 bool HasChainOp =
3840 True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
3841
3842 if (HasChainOp) {
3843 // Avoid creating cycles in the DAG. We must ensure that none of the other
3844 // operands depend on True through it's Chain.
3845 SmallVector<const SDNode *, 4> LoopWorklist;
3847 LoopWorklist.push_back(False.getNode());
3848 if (Mask)
3849 LoopWorklist.push_back(Mask.getNode());
3850 LoopWorklist.push_back(VL.getNode());
3851 if (Glue)
3852 LoopWorklist.push_back(Glue.getNode());
3853 if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
3854 return false;
3855 }
3856
3857 // The vector policy operand may be present for masked intrinsics
3858 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
3859 unsigned TrueVLIndex =
3860 True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3861 SDValue TrueVL = True.getOperand(TrueVLIndex);
3862 SDValue SEW = True.getOperand(TrueVLIndex + 1);
3863
3864 auto GetMinVL = [](SDValue LHS, SDValue RHS) {
3865 if (LHS == RHS)
3866 return LHS;
3867 if (isAllOnesConstant(LHS))
3868 return RHS;
3869 if (isAllOnesConstant(RHS))
3870 return LHS;
3871 auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
3872 auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
3873 if (!CLHS || !CRHS)
3874 return SDValue();
3875 return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
3876 };
3877
3878 // Because N and True must have the same passthru operand (or True's operand
3879 // is implicit_def), the "effective" body is the minimum of their VLs.
3880 SDValue OrigVL = VL;
3881 VL = GetMinVL(TrueVL, VL);
3882 if (!VL)
3883 return false;
3884
3885 // Some operations produce different elementwise results depending on the
3886 // active elements, like viota.m or vredsum. This transformation is illegal
3887 // for these if we change the active elements (i.e. mask or VL).
3888 const MCInstrDesc &TrueBaseMCID = TII->get(RISCV::getRVVMCOpcode(TrueOpc));
3889 if (RISCVII::activeElementsAffectResult(TrueBaseMCID.TSFlags)) {
3890 if (Mask && !usesAllOnesMask(Mask, Glue))
3891 return false;
3892 if (TrueVL != VL)
3893 return false;
3894 }
3895
3896 // If we end up changing the VL or mask of True, then we need to make sure it
3897 // doesn't raise any observable fp exceptions, since changing the active
3898 // elements will affect how fflags is set.
3899 if (TrueVL != VL || !IsMasked)
3900 if (mayRaiseFPException(True.getNode()) &&
3901 !True->getFlags().hasNoFPExcept())
3902 return false;
3903
3904 SDLoc DL(N);
3905
3906 // From the preconditions we checked above, we know the mask and thus glue
3907 // for the result node will be taken from True.
3908 if (IsMasked) {
3909 Mask = True->getOperand(Info->MaskOpIdx);
3910 Glue = True->getOperand(True->getNumOperands() - 1);
3911 assert(Glue.getValueType() == MVT::Glue);
3912 }
3913 // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create
3914 // an all-ones mask to use.
3915 else if (IsVMv(N)) {
3916 unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags;
3917 unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags));
3918 ElementCount EC = N->getValueType(0).getVectorElementCount();
3919 MVT MaskVT = MVT::getVectorVT(MVT::i1, EC);
3920
3921 SDValue AllOnesMask =
3922 SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0);
3924 RISCV::V0, AllOnesMask, SDValue());
3925 Mask = CurDAG->getRegister(RISCV::V0, MaskVT);
3926 Glue = MaskCopy.getValue(1);
3927 }
3928
3929 unsigned MaskedOpc = Info->MaskedPseudo;
3930#ifndef NDEBUG
3931 const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
3933 "Expected instructions with mask have policy operand.");
3934 assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
3935 MCOI::TIED_TO) == 0 &&
3936 "Expected instructions with mask have a tied dest.");
3937#endif
3938
3939 // Use a tumu policy, relaxing it to tail agnostic provided that the passthru
3940 // operand is undefined.
3941 //
3942 // However, if the VL became smaller than what the vmerge had originally, then
3943 // elements past VL that were previously in the vmerge's body will have moved
3944 // to the tail. In that case we always need to use tail undisturbed to
3945 // preserve them.
3946 bool MergeVLShrunk = VL != OrigVL;
3947 uint64_t Policy = (isImplicitDef(Passthru) && !MergeVLShrunk)
3949 : /*TUMU*/ 0;
3950 SDValue PolicyOp =
3951 CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
3952
3953
3955 Ops.push_back(False);
3956
3957 const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
3958 const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode;
3959 assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx);
3960 Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
3961
3962 Ops.push_back(Mask);
3963
3964 // For unmasked "VOp" with rounding mode operand, that is interfaces like
3965 // (..., rm, vl) or (..., rm, vl, policy).
3966 // Its masked version is (..., vm, rm, vl, policy).
3967 // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
3968 if (HasRoundingMode)
3969 Ops.push_back(True->getOperand(TrueVLIndex - 1));
3970
3971 Ops.append({VL, SEW, PolicyOp});
3972
3973 // Result node should have chain operand of True.
3974 if (HasChainOp)
3975 Ops.push_back(True.getOperand(TrueChainOpIdx));
3976
3977 // Add the glue for the CopyToReg of mask->v0.
3978 Ops.push_back(Glue);
3979
3981 CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
3982 Result->setFlags(True->getFlags());
3983
3984 if (!cast<MachineSDNode>(True)->memoperands_empty())
3985 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
3986
3987 // Replace vmerge.vvm node by Result.
3988 ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
3989
3990 // Replace another value of True. E.g. chain and VL.
3991 for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
3992 ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
3993
3994 return true;
3995}
3996
3997bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
3998 bool MadeChange = false;
4000
4001 while (Position != CurDAG->allnodes_begin()) {
4002 SDNode *N = &*--Position;
4003 if (N->use_empty() || !N->isMachineOpcode())
4004 continue;
4005
4006 if (IsVMerge(N) || IsVMv(N))
4007 MadeChange |= performCombineVMergeAndVOps(N);
4008 }
4009 return MadeChange;
4010}
4011
4012/// If our passthru is an implicit_def, use noreg instead. This side
4013/// steps issues with MachineCSE not being able to CSE expressions with
4014/// IMPLICIT_DEF operands while preserving the semantic intent. See
4015/// pr64282 for context. Note that this transform is the last one
4016/// performed at ISEL DAG to DAG.
4017bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4018 bool MadeChange = false;
4020
4021 while (Position != CurDAG->allnodes_begin()) {
4022 SDNode *N = &*--Position;
4023 if (N->use_empty() || !N->isMachineOpcode())
4024 continue;
4025
4026 const unsigned Opc = N->getMachineOpcode();
4027 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
4029 !isImplicitDef(N->getOperand(0)))
4030 continue;
4031
4033 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
4034 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4035 SDValue Op = N->getOperand(I);
4036 Ops.push_back(Op);
4037 }
4038
4040 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4041 Result->setFlags(N->getFlags());
4042 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
4043 ReplaceUses(N, Result);
4044 MadeChange = true;
4045 }
4046 return MadeChange;
4047}
4048
4049
4050// This pass converts a legalized DAG into a RISCV-specific DAG, ready
4051// for instruction scheduling.
4053 CodeGenOptLevel OptLevel) {
4054 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4055}
4056
4058
4060 CodeGenOptLevel OptLevel)
4062 ID, std::make_unique<RISCVDAGToDAGISel>(TM, OptLevel)) {}
4063
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
R600 Clause Merge
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp)
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false)
static bool IsVMv(SDNode *N)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b)
static SDValue findVSplat(SDValue N)
static SDValue getMaskSetter(SDValue MaskOp, SDValue GlueOp)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm)
static bool IsVMerge(SDNode *N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define PASS_NAME
Value * RHS
Value * LHS
bool isZero() const
Definition: APFloat.h:1356
APInt bitcastToAPInt() const
Definition: APFloat.h:1266
bool isPosZero() const
Definition: APFloat.h:1371
bool isNegZero() const
Definition: APFloat.h:1372
Class for arbitrary precision integers.
Definition: APInt.h:78
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1448
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1237
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:266
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1522
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
This class is used to form a handle around another node that is persistent and is updated across invo...
static StringRef getMemConstraintName(ConstraintCode C)
Definition: InlineAsm.h:467
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
Definition: MCInstrDesc.h:463
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TargetMachine, CodeGenOptLevel OptLevel)
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
void selectVSSEG(SDNode *Node, bool IsMasked, bool IsStrided)
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset shoule be all zeros.
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset)
bool SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void selectVLSEGFF(SDNode *Node, bool IsMasked)
bool selectFPImm(SDValue N, SDValue &Imm)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool hasAllBUsers(SDNode *Node) const
void selectVLXSEG(SDNode *Node, bool IsMasked, bool IsOrdered)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
void selectVSXSEG(SDNode *Node, bool IsMasked, bool IsOrdered)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset, bool IsINX=false)
void selectVLSEG(SDNode *Node, bool IsMasked, bool IsStrided)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
unsigned getXLen() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVII::VLMUL getLMUL(MVT VT)
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
const TargetLowering * TLI
MachineFunction * MF
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
bool mayRaiseFPException(SDNode *Node) const
Return whether the node may raise an FP exception.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:226
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:733
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:567
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:451
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:547
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:548
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:487
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:738
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:784
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:687
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:482
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:810
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:576
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:570
ilist< SDNode >::iterator allnodes_iterator
Definition: SelectionDAG.h:550
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:503
bool empty() const
Definition: SmallVector.h:95
size_t size() const
Definition: SmallVector.h:92
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:697
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
TargetInstrInfo - Interface to description of machine instruction set.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:573
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1309
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1099
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:953
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1289
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1305
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:641
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:848
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1552
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1603