LLVM 19.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
20#include "llvm/IR/IntrinsicsRISCV.h"
22#include "llvm/Support/Debug.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE "riscv-isel"
29#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
30
32 "riscv-use-rematerializable-movimm", cl::Hidden,
33 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
34 "constant materialization"),
35 cl::init(false));
36
37namespace llvm::RISCV {
38#define GET_RISCVVSSEGTable_IMPL
39#define GET_RISCVVLSEGTable_IMPL
40#define GET_RISCVVLXSEGTable_IMPL
41#define GET_RISCVVSXSEGTable_IMPL
42#define GET_RISCVVLETable_IMPL
43#define GET_RISCVVSETable_IMPL
44#define GET_RISCVVLXTable_IMPL
45#define GET_RISCVVSXTable_IMPL
46#include "RISCVGenSearchableTables.inc"
47} // namespace llvm::RISCV
48
51
52 bool MadeChange = false;
53 while (Position != CurDAG->allnodes_begin()) {
54 SDNode *N = &*--Position;
55 if (N->use_empty())
56 continue;
57
58 SDValue Result;
59 switch (N->getOpcode()) {
60 case ISD::SPLAT_VECTOR: {
61 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
62 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
63 MVT VT = N->getSimpleValueType(0);
64 unsigned Opc =
66 SDLoc DL(N);
67 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
68 SDValue Src = N->getOperand(0);
69 if (VT.isInteger())
70 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
71 N->getOperand(0));
72 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
73 break;
74 }
76 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
77 // load. Done after lowering and combining so that we have a chance to
78 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
79 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
80 MVT VT = N->getSimpleValueType(0);
81 SDValue Passthru = N->getOperand(0);
82 SDValue Lo = N->getOperand(1);
83 SDValue Hi = N->getOperand(2);
84 SDValue VL = N->getOperand(3);
85 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
86 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
87 "Unexpected VTs!");
89 SDLoc DL(N);
90
91 // Create temporary stack for each expanding node.
92 SDValue StackSlot =
94 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
96
97 SDValue Chain = CurDAG->getEntryNode();
98 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
99
100 SDValue OffsetSlot =
102 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
103 Align(8));
104
105 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
106
107 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
108 SDValue IntID =
109 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
110 SDValue Ops[] = {Chain,
111 IntID,
112 Passthru,
113 StackSlot,
114 CurDAG->getRegister(RISCV::X0, MVT::i64),
115 VL};
116
118 MVT::i64, MPI, Align(8),
120 break;
121 }
122 }
123
124 if (Result) {
125 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
126 LLVM_DEBUG(N->dump(CurDAG));
127 LLVM_DEBUG(dbgs() << "\nNew: ");
128 LLVM_DEBUG(Result->dump(CurDAG));
129 LLVM_DEBUG(dbgs() << "\n");
130
132 MadeChange = true;
133 }
134 }
135
136 if (MadeChange)
138}
139
141 HandleSDNode Dummy(CurDAG->getRoot());
143
144 bool MadeChange = false;
145 while (Position != CurDAG->allnodes_begin()) {
146 SDNode *N = &*--Position;
147 // Skip dead nodes and any non-machine opcodes.
148 if (N->use_empty() || !N->isMachineOpcode())
149 continue;
150
151 MadeChange |= doPeepholeSExtW(N);
152
153 // FIXME: This is here only because the VMerge transform doesn't
154 // know how to handle masked true inputs. Once that has been moved
155 // to post-ISEL, this can be deleted as well.
156 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
157 }
158
159 CurDAG->setRoot(Dummy.getValue());
160
161 MadeChange |= doPeepholeMergeVVMFold();
162
163 // After we're done with everything else, convert IMPLICIT_DEF
164 // passthru operands to NoRegister. This is required to workaround
165 // an optimization deficiency in MachineCSE. This really should
166 // be merged back into each of the patterns (i.e. there's no good
167 // reason not to go directly to NoReg), but is being done this way
168 // to allow easy backporting.
169 MadeChange |= doPeepholeNoRegPassThru();
170
171 if (MadeChange)
173}
174
175static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
177 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
178 for (const RISCVMatInt::Inst &Inst : Seq) {
179 SDValue SDImm = CurDAG->getTargetConstant(Inst.getImm(), DL, VT);
180 SDNode *Result = nullptr;
181 switch (Inst.getOpndKind()) {
182 case RISCVMatInt::Imm:
183 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
184 break;
186 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
187 CurDAG->getRegister(RISCV::X0, VT));
188 break;
190 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
191 break;
193 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
194 break;
195 }
196
197 // Only the first instruction has X0 as its source.
198 SrcReg = SDValue(Result, 0);
199 }
200
201 return SrcReg;
202}
203
204static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
205 int64_t Imm, const RISCVSubtarget &Subtarget) {
207
208 // Use a rematerializable pseudo instruction for short sequences if enabled.
209 if (Seq.size() == 2 && UsePseudoMovImm)
210 return SDValue(
211 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
212 CurDAG->getTargetConstant(Imm, DL, VT)),
213 0);
214
215 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
216 // worst an LUI+ADDIW. This will require an extra register, but avoids a
217 // constant pool.
218 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
219 // low and high 32 bits are the same and bit 31 and 63 are set.
220 if (Seq.size() > 3) {
221 unsigned ShiftAmt, AddOpc;
223 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
224 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
225 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
226
227 SDValue SLLI = SDValue(
228 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
229 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
230 0);
231 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
232 }
233 }
234
235 // Otherwise, use the original sequence.
236 return selectImmSeq(CurDAG, DL, VT, Seq);
237}
238
240 unsigned NF, RISCVII::VLMUL LMUL) {
241 static const unsigned M1TupleRegClassIDs[] = {
242 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
243 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
244 RISCV::VRN8M1RegClassID};
245 static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID,
246 RISCV::VRN3M2RegClassID,
247 RISCV::VRN4M2RegClassID};
248
249 assert(Regs.size() >= 2 && Regs.size() <= 8);
250
251 unsigned RegClassID;
252 unsigned SubReg0;
253 switch (LMUL) {
254 default:
255 llvm_unreachable("Invalid LMUL.");
260 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
261 "Unexpected subreg numbering");
262 SubReg0 = RISCV::sub_vrm1_0;
263 RegClassID = M1TupleRegClassIDs[NF - 2];
264 break;
266 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
267 "Unexpected subreg numbering");
268 SubReg0 = RISCV::sub_vrm2_0;
269 RegClassID = M2TupleRegClassIDs[NF - 2];
270 break;
272 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
273 "Unexpected subreg numbering");
274 SubReg0 = RISCV::sub_vrm4_0;
275 RegClassID = RISCV::VRN2M4RegClassID;
276 break;
277 }
278
279 SDLoc DL(Regs[0]);
281
282 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
283
284 for (unsigned I = 0; I < Regs.size(); ++I) {
285 Ops.push_back(Regs[I]);
286 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
287 }
288 SDNode *N =
289 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
290 return SDValue(N, 0);
291}
292
294 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
295 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
296 bool IsLoad, MVT *IndexVT) {
297 SDValue Chain = Node->getOperand(0);
298 SDValue Glue;
299
300 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
301
302 if (IsStridedOrIndexed) {
303 Operands.push_back(Node->getOperand(CurOp++)); // Index.
304 if (IndexVT)
305 *IndexVT = Operands.back()->getSimpleValueType(0);
306 }
307
308 if (IsMasked) {
309 // Mask needs to be copied to V0.
310 SDValue Mask = Node->getOperand(CurOp++);
311 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
312 Glue = Chain.getValue(1);
313 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
314 }
315 SDValue VL;
316 selectVLOp(Node->getOperand(CurOp++), VL);
317 Operands.push_back(VL);
318
319 MVT XLenVT = Subtarget->getXLenVT();
320 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
321 Operands.push_back(SEWOp);
322
323 // At the IR layer, all the masked load intrinsics have policy operands,
324 // none of the others do. All have passthru operands. For our pseudos,
325 // all loads have policy operands.
326 if (IsLoad) {
328 if (IsMasked)
329 Policy = Node->getConstantOperandVal(CurOp++);
330 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
331 Operands.push_back(PolicyOp);
332 }
333
334 Operands.push_back(Chain); // Chain.
335 if (Glue)
336 Operands.push_back(Glue);
337}
338
339void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
340 bool IsStrided) {
341 SDLoc DL(Node);
342 unsigned NF = Node->getNumValues() - 1;
343 MVT VT = Node->getSimpleValueType(0);
344 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
346
347 unsigned CurOp = 2;
349
350 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
351 Node->op_begin() + CurOp + NF);
352 SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
353 Operands.push_back(Merge);
354 CurOp += NF;
355
356 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
357 Operands, /*IsLoad=*/true);
358
359 const RISCV::VLSEGPseudo *P =
360 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
361 static_cast<unsigned>(LMUL));
362 MachineSDNode *Load =
363 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
364
365 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
366 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
367
368 SDValue SuperReg = SDValue(Load, 0);
369 for (unsigned I = 0; I < NF; ++I) {
370 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
371 ReplaceUses(SDValue(Node, I),
372 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
373 }
374
375 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
376 CurDAG->RemoveDeadNode(Node);
377}
378
379void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
380 SDLoc DL(Node);
381 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain.
382 MVT VT = Node->getSimpleValueType(0);
383 MVT XLenVT = Subtarget->getXLenVT();
384 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
386
387 unsigned CurOp = 2;
389
390 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
391 Node->op_begin() + CurOp + NF);
392 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
393 Operands.push_back(MaskedOff);
394 CurOp += NF;
395
396 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
397 /*IsStridedOrIndexed*/ false, Operands,
398 /*IsLoad=*/true);
399
400 const RISCV::VLSEGPseudo *P =
401 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
402 Log2SEW, static_cast<unsigned>(LMUL));
403 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
404 XLenVT, MVT::Other, Operands);
405
406 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
407 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
408
409 SDValue SuperReg = SDValue(Load, 0);
410 for (unsigned I = 0; I < NF; ++I) {
411 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
412 ReplaceUses(SDValue(Node, I),
413 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
414 }
415
416 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL
417 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain
418 CurDAG->RemoveDeadNode(Node);
419}
420
421void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
422 bool IsOrdered) {
423 SDLoc DL(Node);
424 unsigned NF = Node->getNumValues() - 1;
425 MVT VT = Node->getSimpleValueType(0);
426 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
428
429 unsigned CurOp = 2;
431
432 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
433 Node->op_begin() + CurOp + NF);
434 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
435 Operands.push_back(MaskedOff);
436 CurOp += NF;
437
438 MVT IndexVT;
439 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
440 /*IsStridedOrIndexed*/ true, Operands,
441 /*IsLoad=*/true, &IndexVT);
442
444 "Element count mismatch");
445
446 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
447 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
448 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
449 report_fatal_error("The V extension does not support EEW=64 for index "
450 "values when XLEN=32");
451 }
452 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
453 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
454 static_cast<unsigned>(IndexLMUL));
455 MachineSDNode *Load =
456 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
457
458 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
459 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
460
461 SDValue SuperReg = SDValue(Load, 0);
462 for (unsigned I = 0; I < NF; ++I) {
463 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
464 ReplaceUses(SDValue(Node, I),
465 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
466 }
467
468 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
469 CurDAG->RemoveDeadNode(Node);
470}
471
472void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked,
473 bool IsStrided) {
474 SDLoc DL(Node);
475 unsigned NF = Node->getNumOperands() - 4;
476 if (IsStrided)
477 NF--;
478 if (IsMasked)
479 NF--;
480 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
481 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
483 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
484 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
485
487 Operands.push_back(StoreVal);
488 unsigned CurOp = 2 + NF;
489
490 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
491 Operands);
492
493 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
494 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
495 MachineSDNode *Store =
496 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
497
498 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
499 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
500
501 ReplaceNode(Node, Store);
502}
503
504void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
505 bool IsOrdered) {
506 SDLoc DL(Node);
507 unsigned NF = Node->getNumOperands() - 5;
508 if (IsMasked)
509 --NF;
510 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
511 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
513 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
514 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
515
517 Operands.push_back(StoreVal);
518 unsigned CurOp = 2 + NF;
519
520 MVT IndexVT;
521 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
522 /*IsStridedOrIndexed*/ true, Operands,
523 /*IsLoad=*/false, &IndexVT);
524
526 "Element count mismatch");
527
528 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
529 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
530 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
531 report_fatal_error("The V extension does not support EEW=64 for index "
532 "values when XLEN=32");
533 }
534 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
535 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
536 static_cast<unsigned>(IndexLMUL));
537 MachineSDNode *Store =
538 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
539
540 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
541 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
542
543 ReplaceNode(Node, Store);
544}
545
547 if (!Subtarget->hasVInstructions())
548 return;
549
550 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
551
552 SDLoc DL(Node);
553 MVT XLenVT = Subtarget->getXLenVT();
554
555 unsigned IntNo = Node->getConstantOperandVal(0);
556
557 assert((IntNo == Intrinsic::riscv_vsetvli ||
558 IntNo == Intrinsic::riscv_vsetvlimax) &&
559 "Unexpected vsetvli intrinsic");
560
561 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
562 unsigned Offset = (VLMax ? 1 : 2);
563
564 assert(Node->getNumOperands() == Offset + 2 &&
565 "Unexpected number of operands");
566
567 unsigned SEW =
568 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
569 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
570 Node->getConstantOperandVal(Offset + 1) & 0x7);
571
572 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
573 /*MaskAgnostic*/ true);
574 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
575
576 SDValue VLOperand;
577 unsigned Opcode = RISCV::PseudoVSETVLI;
578 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
579 if (auto VLEN = Subtarget->getRealVLen())
580 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
581 VLMax = true;
582 }
583 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
584 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
585 Opcode = RISCV::PseudoVSETVLIX0;
586 } else {
587 VLOperand = Node->getOperand(1);
588
589 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
590 uint64_t AVL = C->getZExtValue();
591 if (isUInt<5>(AVL)) {
592 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
593 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
594 XLenVT, VLImm, VTypeIOp));
595 return;
596 }
597 }
598 }
599
600 ReplaceNode(Node,
601 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
602}
603
605 MVT VT = Node->getSimpleValueType(0);
606 unsigned Opcode = Node->getOpcode();
607 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
608 "Unexpected opcode");
609 SDLoc DL(Node);
610
611 // For operations of the form (x << C1) op C2, check if we can use
612 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
613 SDValue N0 = Node->getOperand(0);
614 SDValue N1 = Node->getOperand(1);
615
616 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
617 if (!Cst)
618 return false;
619
620 int64_t Val = Cst->getSExtValue();
621
622 // Check if immediate can already use ANDI/ORI/XORI.
623 if (isInt<12>(Val))
624 return false;
625
626 SDValue Shift = N0;
627
628 // If Val is simm32 and we have a sext_inreg from i32, then the binop
629 // produces at least 33 sign bits. We can peek through the sext_inreg and use
630 // a SLLIW at the end.
631 bool SignExt = false;
632 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
633 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
634 SignExt = true;
635 Shift = N0.getOperand(0);
636 }
637
638 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
639 return false;
640
641 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
642 if (!ShlCst)
643 return false;
644
645 uint64_t ShAmt = ShlCst->getZExtValue();
646
647 // Make sure that we don't change the operation by removing bits.
648 // This only matters for OR and XOR, AND is unaffected.
649 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
650 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
651 return false;
652
653 int64_t ShiftedVal = Val >> ShAmt;
654 if (!isInt<12>(ShiftedVal))
655 return false;
656
657 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
658 if (SignExt && ShAmt >= 32)
659 return false;
660
661 // Ok, we can reorder to get a smaller immediate.
662 unsigned BinOpc;
663 switch (Opcode) {
664 default: llvm_unreachable("Unexpected opcode");
665 case ISD::AND: BinOpc = RISCV::ANDI; break;
666 case ISD::OR: BinOpc = RISCV::ORI; break;
667 case ISD::XOR: BinOpc = RISCV::XORI; break;
668 }
669
670 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
671
672 SDNode *BinOp =
673 CurDAG->getMachineNode(BinOpc, DL, VT, Shift.getOperand(0),
674 CurDAG->getTargetConstant(ShiftedVal, DL, VT));
675 SDNode *SLLI =
676 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
677 CurDAG->getTargetConstant(ShAmt, DL, VT));
678 ReplaceNode(Node, SLLI);
679 return true;
680}
681
683 // Only supported with XTHeadBb at the moment.
684 if (!Subtarget->hasVendorXTHeadBb())
685 return false;
686
687 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
688 if (!N1C)
689 return false;
690
691 SDValue N0 = Node->getOperand(0);
692 if (!N0.hasOneUse())
693 return false;
694
695 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
696 MVT VT) {
697 return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
698 CurDAG->getTargetConstant(Msb, DL, VT),
699 CurDAG->getTargetConstant(Lsb, DL, VT));
700 };
701
702 SDLoc DL(Node);
703 MVT VT = Node->getSimpleValueType(0);
704 const unsigned RightShAmt = N1C->getZExtValue();
705
706 // Transform (sra (shl X, C1) C2) with C1 < C2
707 // -> (TH.EXT X, msb, lsb)
708 if (N0.getOpcode() == ISD::SHL) {
709 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
710 if (!N01C)
711 return false;
712
713 const unsigned LeftShAmt = N01C->getZExtValue();
714 // Make sure that this is a bitfield extraction (i.e., the shift-right
715 // amount can not be less than the left-shift).
716 if (LeftShAmt > RightShAmt)
717 return false;
718
719 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
720 const unsigned Msb = MsbPlusOne - 1;
721 const unsigned Lsb = RightShAmt - LeftShAmt;
722
723 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
724 ReplaceNode(Node, TH_EXT);
725 return true;
726 }
727
728 // Transform (sra (sext_inreg X, _), C) ->
729 // (TH.EXT X, msb, lsb)
730 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
731 unsigned ExtSize =
732 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
733
734 // ExtSize of 32 should use sraiw via tablegen pattern.
735 if (ExtSize == 32)
736 return false;
737
738 const unsigned Msb = ExtSize - 1;
739 const unsigned Lsb = RightShAmt;
740
741 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
742 ReplaceNode(Node, TH_EXT);
743 return true;
744 }
745
746 return false;
747}
748
750 // Target does not support indexed loads.
751 if (!Subtarget->hasVendorXTHeadMemIdx())
752 return false;
753
754 LoadSDNode *Ld = cast<LoadSDNode>(Node);
756 if (AM == ISD::UNINDEXED)
757 return false;
758
759 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
760 if (!C)
761 return false;
762
763 EVT LoadVT = Ld->getMemoryVT();
764 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
765 "Unexpected addressing mode");
766 bool IsPre = AM == ISD::PRE_INC;
767 bool IsPost = AM == ISD::POST_INC;
768 int64_t Offset = C->getSExtValue();
769
770 // The constants that can be encoded in the THeadMemIdx instructions
771 // are of the form (sign_extend(imm5) << imm2).
772 int64_t Shift;
773 for (Shift = 0; Shift < 4; Shift++)
774 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
775 break;
776
777 // Constant cannot be encoded.
778 if (Shift == 4)
779 return false;
780
781 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
782 unsigned Opcode;
783 if (LoadVT == MVT::i8 && IsPre)
784 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
785 else if (LoadVT == MVT::i8 && IsPost)
786 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
787 else if (LoadVT == MVT::i16 && IsPre)
788 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
789 else if (LoadVT == MVT::i16 && IsPost)
790 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
791 else if (LoadVT == MVT::i32 && IsPre)
792 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
793 else if (LoadVT == MVT::i32 && IsPost)
794 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
795 else if (LoadVT == MVT::i64 && IsPre)
796 Opcode = RISCV::TH_LDIB;
797 else if (LoadVT == MVT::i64 && IsPost)
798 Opcode = RISCV::TH_LDIA;
799 else
800 return false;
801
802 EVT Ty = Ld->getOffset().getValueType();
803 SDValue Ops[] = {Ld->getBasePtr(),
804 CurDAG->getTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
805 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty),
806 Ld->getChain()};
807 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
808 Ld->getValueType(1), MVT::Other, Ops);
809
810 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
811 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
812
813 ReplaceNode(Node, New);
814
815 return true;
816}
817
819 if (!Subtarget->hasVInstructions())
820 return;
821
822 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
823
824 SDLoc DL(Node);
825 unsigned IntNo = Node->getConstantOperandVal(1);
826
827 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
828 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
829 "Unexpected vsetvli intrinsic");
830
831 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
832 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
833 SDValue SEWOp =
834 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
835 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
836 Node->getOperand(4), Node->getOperand(5),
837 Node->getOperand(8), SEWOp,
838 Node->getOperand(0)};
839
840 unsigned Opcode;
841 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
842 switch (LMulSDNode->getSExtValue()) {
843 case 5:
844 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF8
845 : RISCV::PseudoVC_I_SE_MF8;
846 break;
847 case 6:
848 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF4
849 : RISCV::PseudoVC_I_SE_MF4;
850 break;
851 case 7:
852 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF2
853 : RISCV::PseudoVC_I_SE_MF2;
854 break;
855 case 0:
856 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M1
857 : RISCV::PseudoVC_I_SE_M1;
858 break;
859 case 1:
860 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M2
861 : RISCV::PseudoVC_I_SE_M2;
862 break;
863 case 2:
864 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M4
865 : RISCV::PseudoVC_I_SE_M4;
866 break;
867 case 3:
868 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M8
869 : RISCV::PseudoVC_I_SE_M8;
870 break;
871 }
872
874 Opcode, DL, Node->getSimpleValueType(0), Operands));
875}
876
878 // If we have a custom node, we have already selected.
879 if (Node->isMachineOpcode()) {
880 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
881 Node->setNodeId(-1);
882 return;
883 }
884
885 // Instruction Selection not handled by the auto-generated tablegen selection
886 // should be handled here.
887 unsigned Opcode = Node->getOpcode();
888 MVT XLenVT = Subtarget->getXLenVT();
889 SDLoc DL(Node);
890 MVT VT = Node->getSimpleValueType(0);
891
892 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
893
894 switch (Opcode) {
895 case ISD::Constant: {
896 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
897 auto *ConstNode = cast<ConstantSDNode>(Node);
898 if (ConstNode->isZero()) {
899 SDValue New =
900 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
901 ReplaceNode(Node, New.getNode());
902 return;
903 }
904 int64_t Imm = ConstNode->getSExtValue();
905 // If the upper XLen-16 bits are not used, try to convert this to a simm12
906 // by sign extending bit 15.
907 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
908 hasAllHUsers(Node))
909 Imm = SignExtend64<16>(Imm);
910 // If the upper 32-bits are not used try to convert this into a simm32 by
911 // sign extending bit 32.
912 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
913 Imm = SignExtend64<32>(Imm);
914
915 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
916 return;
917 }
918 case ISD::ConstantFP: {
919 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
920 auto [FPImm, NeedsFNeg] =
921 static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
922 VT);
923 if (FPImm >= 0) {
924 unsigned Opc;
925 unsigned FNegOpc;
926 switch (VT.SimpleTy) {
927 default:
928 llvm_unreachable("Unexpected size");
929 case MVT::f16:
930 Opc = RISCV::FLI_H;
931 FNegOpc = RISCV::FSGNJN_H;
932 break;
933 case MVT::f32:
934 Opc = RISCV::FLI_S;
935 FNegOpc = RISCV::FSGNJN_S;
936 break;
937 case MVT::f64:
938 Opc = RISCV::FLI_D;
939 FNegOpc = RISCV::FSGNJN_D;
940 break;
941 }
943 Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));
944 if (NeedsFNeg)
945 Res = CurDAG->getMachineNode(FNegOpc, DL, VT, SDValue(Res, 0),
946 SDValue(Res, 0));
947
948 ReplaceNode(Node, Res);
949 return;
950 }
951
952 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
953 SDValue Imm;
954 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
955 // create an integer immediate.
956 if (APF.isPosZero() || NegZeroF64)
957 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
958 else
959 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
960 *Subtarget);
961
962 bool HasZdinx = Subtarget->hasStdExtZdinx();
963 bool Is64Bit = Subtarget->is64Bit();
964 unsigned Opc;
965 switch (VT.SimpleTy) {
966 default:
967 llvm_unreachable("Unexpected size");
968 case MVT::bf16:
969 assert(Subtarget->hasStdExtZfbfmin());
970 Opc = RISCV::FMV_H_X;
971 break;
972 case MVT::f16:
973 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
974 break;
975 case MVT::f32:
976 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
977 break;
978 case MVT::f64:
979 // For RV32, we can't move from a GPR, we need to convert instead. This
980 // should only happen for +0.0 and -0.0.
981 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
982 if (Is64Bit)
983 Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
984 else
985 Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
986 break;
987 }
988
989 SDNode *Res;
990 if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
991 Res = CurDAG->getMachineNode(
992 Opc, DL, VT, Imm,
994 else
995 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
996
997 // For f64 -0.0, we need to insert a fneg.d idiom.
998 if (NegZeroF64) {
999 Opc = RISCV::FSGNJN_D;
1000 if (HasZdinx)
1001 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1002 Res =
1003 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1004 }
1005
1006 ReplaceNode(Node, Res);
1007 return;
1008 }
1010 if (!Subtarget->hasStdExtZdinx())
1011 break;
1012
1013 assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1014
1015 SDValue Ops[] = {
1016 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1017 Node->getOperand(0),
1018 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1019 Node->getOperand(1),
1020 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1021
1022 SDNode *N =
1023 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::f64, Ops);
1024 ReplaceNode(Node, N);
1025 return;
1026 }
1027 case RISCVISD::SplitF64: {
1028 if (Subtarget->hasStdExtZdinx()) {
1029 assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1030
1031 if (!SDValue(Node, 0).use_empty()) {
1032 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, VT,
1033 Node->getOperand(0));
1034 ReplaceUses(SDValue(Node, 0), Lo);
1035 }
1036
1037 if (!SDValue(Node, 1).use_empty()) {
1038 SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, VT,
1039 Node->getOperand(0));
1040 ReplaceUses(SDValue(Node, 1), Hi);
1041 }
1042
1043 CurDAG->RemoveDeadNode(Node);
1044 return;
1045 }
1046
1047 if (!Subtarget->hasStdExtZfa())
1048 break;
1049 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1050 "Unexpected subtarget");
1051
1052 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1053 if (!SDValue(Node, 0).use_empty()) {
1054 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1055 Node->getOperand(0));
1056 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1057 }
1058 if (!SDValue(Node, 1).use_empty()) {
1059 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1060 Node->getOperand(0));
1061 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1062 }
1063
1064 CurDAG->RemoveDeadNode(Node);
1065 return;
1066 }
1067 case ISD::SHL: {
1068 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1069 if (!N1C)
1070 break;
1071 SDValue N0 = Node->getOperand(0);
1072 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1073 !isa<ConstantSDNode>(N0.getOperand(1)))
1074 break;
1075 unsigned ShAmt = N1C->getZExtValue();
1076 uint64_t Mask = N0.getConstantOperandVal(1);
1077
1078 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
1079 // 32 leading zeros and C3 trailing zeros.
1080 if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
1081 unsigned XLen = Subtarget->getXLen();
1082 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1083 unsigned TrailingZeros = llvm::countr_zero(Mask);
1084 if (TrailingZeros > 0 && LeadingZeros == 32) {
1085 SDNode *SRLIW = CurDAG->getMachineNode(
1086 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1087 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1088 SDNode *SLLI = CurDAG->getMachineNode(
1089 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1090 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1091 ReplaceNode(Node, SLLI);
1092 return;
1093 }
1094 }
1095 break;
1096 }
1097 case ISD::SRL: {
1098 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1099 if (!N1C)
1100 break;
1101 SDValue N0 = Node->getOperand(0);
1102 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1103 break;
1104 unsigned ShAmt = N1C->getZExtValue();
1105 uint64_t Mask = N0.getConstantOperandVal(1);
1106
1107 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1108 // 32 leading zeros and C3 trailing zeros.
1109 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1110 unsigned XLen = Subtarget->getXLen();
1111 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1112 unsigned TrailingZeros = llvm::countr_zero(Mask);
1113 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1114 SDNode *SRLIW = CurDAG->getMachineNode(
1115 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1116 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1117 SDNode *SLLI = CurDAG->getMachineNode(
1118 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1119 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1120 ReplaceNode(Node, SLLI);
1121 return;
1122 }
1123 }
1124
1125 // Optimize (srl (and X, C2), C) ->
1126 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1127 // Where C2 is a mask with C3 trailing ones.
1128 // Taking into account that the C2 may have had lower bits unset by
1129 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1130 // This pattern occurs when type legalizing right shifts for types with
1131 // less than XLen bits.
1132 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1133 if (!isMask_64(Mask))
1134 break;
1135 unsigned TrailingOnes = llvm::countr_one(Mask);
1136 if (ShAmt >= TrailingOnes)
1137 break;
1138 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1139 if (TrailingOnes == 32) {
1140 SDNode *SRLI = CurDAG->getMachineNode(
1141 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1142 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1143 ReplaceNode(Node, SRLI);
1144 return;
1145 }
1146
1147 // Only do the remaining transforms if the AND has one use.
1148 if (!N0.hasOneUse())
1149 break;
1150
1151 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1152 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1153 SDNode *BEXTI = CurDAG->getMachineNode(
1154 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1155 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1156 ReplaceNode(Node, BEXTI);
1157 return;
1158 }
1159
1160 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1161 SDNode *SLLI =
1162 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1163 CurDAG->getTargetConstant(LShAmt, DL, VT));
1164 SDNode *SRLI = CurDAG->getMachineNode(
1165 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1166 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1167 ReplaceNode(Node, SRLI);
1168 return;
1169 }
1170 case ISD::SRA: {
1171 if (trySignedBitfieldExtract(Node))
1172 return;
1173
1174 // Optimize (sra (sext_inreg X, i16), C) ->
1175 // (srai (slli X, (XLen-16), (XLen-16) + C)
1176 // And (sra (sext_inreg X, i8), C) ->
1177 // (srai (slli X, (XLen-8), (XLen-8) + C)
1178 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1179 // This transform matches the code we get without Zbb. The shifts are more
1180 // compressible, and this can help expose CSE opportunities in the sdiv by
1181 // constant optimization.
1182 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1183 if (!N1C)
1184 break;
1185 SDValue N0 = Node->getOperand(0);
1186 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1187 break;
1188 unsigned ShAmt = N1C->getZExtValue();
1189 unsigned ExtSize =
1190 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1191 // ExtSize of 32 should use sraiw via tablegen pattern.
1192 if (ExtSize >= 32 || ShAmt >= ExtSize)
1193 break;
1194 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1195 SDNode *SLLI =
1196 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1197 CurDAG->getTargetConstant(LShAmt, DL, VT));
1198 SDNode *SRAI = CurDAG->getMachineNode(
1199 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1200 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1201 ReplaceNode(Node, SRAI);
1202 return;
1203 }
1204 case ISD::OR:
1205 case ISD::XOR:
1206 if (tryShrinkShlLogicImm(Node))
1207 return;
1208
1209 break;
1210 case ISD::AND: {
1211 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1212 if (!N1C)
1213 break;
1214 uint64_t C1 = N1C->getZExtValue();
1215 const bool isC1Mask = isMask_64(C1);
1216 const bool isC1ANDI = isInt<12>(C1);
1217
1218 SDValue N0 = Node->getOperand(0);
1219
1220 auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
1221 SDValue X, unsigned Msb,
1222 unsigned Lsb) {
1223 if (!Subtarget->hasVendorXTHeadBb())
1224 return false;
1225
1226 SDNode *TH_EXTU = CurDAG->getMachineNode(
1227 RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
1228 CurDAG->getTargetConstant(Lsb, DL, VT));
1229 ReplaceNode(Node, TH_EXTU);
1230 return true;
1231 };
1232
1233 bool LeftShift = N0.getOpcode() == ISD::SHL;
1234 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1235 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1236 if (!C)
1237 break;
1238 unsigned C2 = C->getZExtValue();
1239 unsigned XLen = Subtarget->getXLen();
1240 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1241
1242 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1243 // shift pair might offer more compression opportunities.
1244 // TODO: We could check for C extension here, but we don't have many lit
1245 // tests with the C extension enabled so not checking gets better
1246 // coverage.
1247 // TODO: What if ANDI faster than shift?
1248 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1249
1250 // Clear irrelevant bits in the mask.
1251 if (LeftShift)
1252 C1 &= maskTrailingZeros<uint64_t>(C2);
1253 else
1254 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1255
1256 // Some transforms should only be done if the shift has a single use or
1257 // the AND would become (srli (slli X, 32), 32)
1258 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1259
1260 SDValue X = N0.getOperand(0);
1261
1262 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1263 // with c3 leading zeros.
1264 if (!LeftShift && isC1Mask) {
1265 unsigned Leading = XLen - llvm::bit_width(C1);
1266 if (C2 < Leading) {
1267 // If the number of leading zeros is C2+32 this can be SRLIW.
1268 if (C2 + 32 == Leading) {
1269 SDNode *SRLIW = CurDAG->getMachineNode(
1270 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1271 ReplaceNode(Node, SRLIW);
1272 return;
1273 }
1274
1275 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1276 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1277 //
1278 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1279 // legalized and goes through DAG combine.
1280 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1281 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1282 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1283 SDNode *SRAIW =
1284 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1285 CurDAG->getTargetConstant(31, DL, VT));
1286 SDNode *SRLIW = CurDAG->getMachineNode(
1287 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1288 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1289 ReplaceNode(Node, SRLIW);
1290 return;
1291 }
1292
1293 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1294 // available.
1295 // Transform (and (srl x, C2), C1)
1296 // -> (<bfextract> x, msb, lsb)
1297 //
1298 // Make sure to keep this below the SRLIW cases, as we always want to
1299 // prefer the more common instruction.
1300 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1301 const unsigned Lsb = C2;
1302 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1303 return;
1304
1305 // (srli (slli x, c3-c2), c3).
1306 // Skip if we could use (zext.w (sraiw X, C2)).
1307 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1308 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1309 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1310 // Also Skip if we can use bexti or th.tst.
1311 Skip |= HasBitTest && Leading == XLen - 1;
1312 if (OneUseOrZExtW && !Skip) {
1313 SDNode *SLLI = CurDAG->getMachineNode(
1314 RISCV::SLLI, DL, VT, X,
1315 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1316 SDNode *SRLI = CurDAG->getMachineNode(
1317 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1318 CurDAG->getTargetConstant(Leading, DL, VT));
1319 ReplaceNode(Node, SRLI);
1320 return;
1321 }
1322 }
1323 }
1324
1325 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1326 // shifted by c2 bits with c3 leading zeros.
1327 if (LeftShift && isShiftedMask_64(C1)) {
1328 unsigned Leading = XLen - llvm::bit_width(C1);
1329
1330 if (C2 + Leading < XLen &&
1331 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1332 // Use slli.uw when possible.
1333 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1334 SDNode *SLLI_UW =
1335 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1336 CurDAG->getTargetConstant(C2, DL, VT));
1337 ReplaceNode(Node, SLLI_UW);
1338 return;
1339 }
1340
1341 // (srli (slli c2+c3), c3)
1342 if (OneUseOrZExtW && !IsCANDI) {
1343 SDNode *SLLI = CurDAG->getMachineNode(
1344 RISCV::SLLI, DL, VT, X,
1345 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1346 SDNode *SRLI = CurDAG->getMachineNode(
1347 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1348 CurDAG->getTargetConstant(Leading, DL, VT));
1349 ReplaceNode(Node, SRLI);
1350 return;
1351 }
1352 }
1353 }
1354
1355 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1356 // shifted mask with c2 leading zeros and c3 trailing zeros.
1357 if (!LeftShift && isShiftedMask_64(C1)) {
1358 unsigned Leading = XLen - llvm::bit_width(C1);
1359 unsigned Trailing = llvm::countr_zero(C1);
1360 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1361 !IsCANDI) {
1362 unsigned SrliOpc = RISCV::SRLI;
1363 // If the input is zexti32 we should use SRLIW.
1364 if (X.getOpcode() == ISD::AND &&
1365 isa<ConstantSDNode>(X.getOperand(1)) &&
1366 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1367 SrliOpc = RISCV::SRLIW;
1368 X = X.getOperand(0);
1369 }
1370 SDNode *SRLI = CurDAG->getMachineNode(
1371 SrliOpc, DL, VT, X,
1372 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1373 SDNode *SLLI = CurDAG->getMachineNode(
1374 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1375 CurDAG->getTargetConstant(Trailing, DL, VT));
1376 ReplaceNode(Node, SLLI);
1377 return;
1378 }
1379 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1380 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1381 OneUseOrZExtW && !IsCANDI) {
1382 SDNode *SRLIW = CurDAG->getMachineNode(
1383 RISCV::SRLIW, DL, VT, X,
1384 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1385 SDNode *SLLI = CurDAG->getMachineNode(
1386 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1387 CurDAG->getTargetConstant(Trailing, DL, VT));
1388 ReplaceNode(Node, SLLI);
1389 return;
1390 }
1391 }
1392
1393 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1394 // shifted mask with no leading zeros and c3 trailing zeros.
1395 if (LeftShift && isShiftedMask_64(C1)) {
1396 unsigned Leading = XLen - llvm::bit_width(C1);
1397 unsigned Trailing = llvm::countr_zero(C1);
1398 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1399 SDNode *SRLI = CurDAG->getMachineNode(
1400 RISCV::SRLI, DL, VT, X,
1401 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1402 SDNode *SLLI = CurDAG->getMachineNode(
1403 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1404 CurDAG->getTargetConstant(Trailing, DL, VT));
1405 ReplaceNode(Node, SLLI);
1406 return;
1407 }
1408 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1409 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1410 SDNode *SRLIW = CurDAG->getMachineNode(
1411 RISCV::SRLIW, DL, VT, X,
1412 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1413 SDNode *SLLI = CurDAG->getMachineNode(
1414 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1415 CurDAG->getTargetConstant(Trailing, DL, VT));
1416 ReplaceNode(Node, SLLI);
1417 return;
1418 }
1419 }
1420 }
1421
1422 // If C1 masks off the upper bits only (but can't be formed as an
1423 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1424 // available.
1425 // Transform (and x, C1)
1426 // -> (<bfextract> x, msb, lsb)
1427 if (isC1Mask && !isC1ANDI) {
1428 const unsigned Msb = llvm::bit_width(C1) - 1;
1429 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1430 return;
1431 }
1432
1433 if (tryShrinkShlLogicImm(Node))
1434 return;
1435
1436 break;
1437 }
1438 case ISD::MUL: {
1439 // Special case for calculating (mul (and X, C2), C1) where the full product
1440 // fits in XLen bits. We can shift X left by the number of leading zeros in
1441 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1442 // product has XLen trailing zeros, putting it in the output of MULHU. This
1443 // can avoid materializing a constant in a register for C2.
1444
1445 // RHS should be a constant.
1446 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1447 if (!N1C || !N1C->hasOneUse())
1448 break;
1449
1450 // LHS should be an AND with constant.
1451 SDValue N0 = Node->getOperand(0);
1452 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1453 break;
1454
1456
1457 // Constant should be a mask.
1458 if (!isMask_64(C2))
1459 break;
1460
1461 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1462 // multiple users or the constant is a simm12. This prevents inserting a
1463 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1464 // make it more costly to materialize. Otherwise, using a SLLI might allow
1465 // it to be compressed.
1466 bool IsANDIOrZExt =
1467 isInt<12>(C2) ||
1468 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1469 // With XTHeadBb, we can use TH.EXTU.
1470 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1471 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1472 break;
1473 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1474 // the constant is a simm32.
1475 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1476 // With XTHeadBb, we can use TH.EXTU.
1477 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1478 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1479 break;
1480
1481 // We need to shift left the AND input and C1 by a total of XLen bits.
1482
1483 // How far left do we need to shift the AND input?
1484 unsigned XLen = Subtarget->getXLen();
1485 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1486
1487 // The constant gets shifted by the remaining amount unless that would
1488 // shift bits out.
1489 uint64_t C1 = N1C->getZExtValue();
1490 unsigned ConstantShift = XLen - LeadingZeros;
1491 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1492 break;
1493
1494 uint64_t ShiftedC1 = C1 << ConstantShift;
1495 // If this RV32, we need to sign extend the constant.
1496 if (XLen == 32)
1497 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1498
1499 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1500 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1501 SDNode *SLLI =
1502 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1503 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1504 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1505 SDValue(SLLI, 0), SDValue(Imm, 0));
1506 ReplaceNode(Node, MULHU);
1507 return;
1508 }
1509 case ISD::LOAD: {
1510 if (tryIndexedLoad(Node))
1511 return;
1512 break;
1513 }
1515 unsigned IntNo = Node->getConstantOperandVal(0);
1516 switch (IntNo) {
1517 // By default we do not custom select any intrinsic.
1518 default:
1519 break;
1520 case Intrinsic::riscv_vmsgeu:
1521 case Intrinsic::riscv_vmsge: {
1522 SDValue Src1 = Node->getOperand(1);
1523 SDValue Src2 = Node->getOperand(2);
1524 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1525 bool IsCmpUnsignedZero = false;
1526 // Only custom select scalar second operand.
1527 if (Src2.getValueType() != XLenVT)
1528 break;
1529 // Small constants are handled with patterns.
1530 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1531 int64_t CVal = C->getSExtValue();
1532 if (CVal >= -15 && CVal <= 16) {
1533 if (!IsUnsigned || CVal != 0)
1534 break;
1535 IsCmpUnsignedZero = true;
1536 }
1537 }
1538 MVT Src1VT = Src1.getSimpleValueType();
1539 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;
1540 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1541 default:
1542 llvm_unreachable("Unexpected LMUL!");
1543#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \
1544 case RISCVII::VLMUL::lmulenum: \
1545 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1546 : RISCV::PseudoVMSLT_VX_##suffix; \
1547 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1548 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \
1549 break;
1550 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)
1551 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)
1552 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)
1554 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)
1555 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)
1556 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)
1557#undef CASE_VMSLT_VMNAND_VMSET_OPCODES
1558 }
1560 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1561 SDValue VL;
1562 selectVLOp(Node->getOperand(3), VL);
1563
1564 // If vmsgeu with 0 immediate, expand it to vmset.
1565 if (IsCmpUnsignedZero) {
1566 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));
1567 return;
1568 }
1569
1570 // Expand to
1571 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1572 SDValue Cmp = SDValue(
1573 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1574 0);
1575 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1576 {Cmp, Cmp, VL, SEW}));
1577 return;
1578 }
1579 case Intrinsic::riscv_vmsgeu_mask:
1580 case Intrinsic::riscv_vmsge_mask: {
1581 SDValue Src1 = Node->getOperand(2);
1582 SDValue Src2 = Node->getOperand(3);
1583 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1584 bool IsCmpUnsignedZero = false;
1585 // Only custom select scalar second operand.
1586 if (Src2.getValueType() != XLenVT)
1587 break;
1588 // Small constants are handled with patterns.
1589 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1590 int64_t CVal = C->getSExtValue();
1591 if (CVal >= -15 && CVal <= 16) {
1592 if (!IsUnsigned || CVal != 0)
1593 break;
1594 IsCmpUnsignedZero = true;
1595 }
1596 }
1597 MVT Src1VT = Src1.getSimpleValueType();
1598 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1599 VMOROpcode;
1600 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1601 default:
1602 llvm_unreachable("Unexpected LMUL!");
1603#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \
1604 case RISCVII::VLMUL::lmulenum: \
1605 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1606 : RISCV::PseudoVMSLT_VX_##suffix; \
1607 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
1608 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
1609 break;
1610 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)
1611 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)
1612 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)
1613 CASE_VMSLT_OPCODES(LMUL_1, M1, B8)
1614 CASE_VMSLT_OPCODES(LMUL_2, M2, B16)
1615 CASE_VMSLT_OPCODES(LMUL_4, M4, B32)
1616 CASE_VMSLT_OPCODES(LMUL_8, M8, B64)
1617#undef CASE_VMSLT_OPCODES
1618 }
1619 // Mask operations use the LMUL from the mask type.
1620 switch (RISCVTargetLowering::getLMUL(VT)) {
1621 default:
1622 llvm_unreachable("Unexpected LMUL!");
1623#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
1624 case RISCVII::VLMUL::lmulenum: \
1625 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
1626 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
1627 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
1628 break;
1629 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)
1630 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)
1631 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)
1636#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1637 }
1639 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1640 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1641 SDValue VL;
1642 selectVLOp(Node->getOperand(5), VL);
1643 SDValue MaskedOff = Node->getOperand(1);
1644 SDValue Mask = Node->getOperand(4);
1645
1646 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.
1647 if (IsCmpUnsignedZero) {
1648 // We don't need vmor if the MaskedOff and the Mask are the same
1649 // value.
1650 if (Mask == MaskedOff) {
1651 ReplaceUses(Node, Mask.getNode());
1652 return;
1653 }
1654 ReplaceNode(Node,
1655 CurDAG->getMachineNode(VMOROpcode, DL, VT,
1656 {Mask, MaskedOff, VL, MaskSEW}));
1657 return;
1658 }
1659
1660 // If the MaskedOff value and the Mask are the same value use
1661 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
1662 // This avoids needing to copy v0 to vd before starting the next sequence.
1663 if (Mask == MaskedOff) {
1664 SDValue Cmp = SDValue(
1665 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1666 0);
1667 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1668 {Mask, Cmp, VL, MaskSEW}));
1669 return;
1670 }
1671
1672 // Mask needs to be copied to V0.
1674 RISCV::V0, Mask, SDValue());
1675 SDValue Glue = Chain.getValue(1);
1676 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1677
1678 // Otherwise use
1679 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1680 // The result is mask undisturbed.
1681 // We use the same instructions to emulate mask agnostic behavior, because
1682 // the agnostic result can be either undisturbed or all 1.
1683 SDValue Cmp = SDValue(
1684 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1685 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1686 0);
1687 // vmxor.mm vd, vd, v0 is used to update active value.
1688 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1689 {Cmp, Mask, VL, MaskSEW}));
1690 return;
1691 }
1692 case Intrinsic::riscv_vsetvli:
1693 case Intrinsic::riscv_vsetvlimax:
1694 return selectVSETVLI(Node);
1695 }
1696 break;
1697 }
1699 unsigned IntNo = Node->getConstantOperandVal(1);
1700 switch (IntNo) {
1701 // By default we do not custom select any intrinsic.
1702 default:
1703 break;
1704 case Intrinsic::riscv_vlseg2:
1705 case Intrinsic::riscv_vlseg3:
1706 case Intrinsic::riscv_vlseg4:
1707 case Intrinsic::riscv_vlseg5:
1708 case Intrinsic::riscv_vlseg6:
1709 case Intrinsic::riscv_vlseg7:
1710 case Intrinsic::riscv_vlseg8: {
1711 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1712 return;
1713 }
1714 case Intrinsic::riscv_vlseg2_mask:
1715 case Intrinsic::riscv_vlseg3_mask:
1716 case Intrinsic::riscv_vlseg4_mask:
1717 case Intrinsic::riscv_vlseg5_mask:
1718 case Intrinsic::riscv_vlseg6_mask:
1719 case Intrinsic::riscv_vlseg7_mask:
1720 case Intrinsic::riscv_vlseg8_mask: {
1721 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1722 return;
1723 }
1724 case Intrinsic::riscv_vlsseg2:
1725 case Intrinsic::riscv_vlsseg3:
1726 case Intrinsic::riscv_vlsseg4:
1727 case Intrinsic::riscv_vlsseg5:
1728 case Intrinsic::riscv_vlsseg6:
1729 case Intrinsic::riscv_vlsseg7:
1730 case Intrinsic::riscv_vlsseg8: {
1731 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1732 return;
1733 }
1734 case Intrinsic::riscv_vlsseg2_mask:
1735 case Intrinsic::riscv_vlsseg3_mask:
1736 case Intrinsic::riscv_vlsseg4_mask:
1737 case Intrinsic::riscv_vlsseg5_mask:
1738 case Intrinsic::riscv_vlsseg6_mask:
1739 case Intrinsic::riscv_vlsseg7_mask:
1740 case Intrinsic::riscv_vlsseg8_mask: {
1741 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1742 return;
1743 }
1744 case Intrinsic::riscv_vloxseg2:
1745 case Intrinsic::riscv_vloxseg3:
1746 case Intrinsic::riscv_vloxseg4:
1747 case Intrinsic::riscv_vloxseg5:
1748 case Intrinsic::riscv_vloxseg6:
1749 case Intrinsic::riscv_vloxseg7:
1750 case Intrinsic::riscv_vloxseg8:
1751 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1752 return;
1753 case Intrinsic::riscv_vluxseg2:
1754 case Intrinsic::riscv_vluxseg3:
1755 case Intrinsic::riscv_vluxseg4:
1756 case Intrinsic::riscv_vluxseg5:
1757 case Intrinsic::riscv_vluxseg6:
1758 case Intrinsic::riscv_vluxseg7:
1759 case Intrinsic::riscv_vluxseg8:
1760 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1761 return;
1762 case Intrinsic::riscv_vloxseg2_mask:
1763 case Intrinsic::riscv_vloxseg3_mask:
1764 case Intrinsic::riscv_vloxseg4_mask:
1765 case Intrinsic::riscv_vloxseg5_mask:
1766 case Intrinsic::riscv_vloxseg6_mask:
1767 case Intrinsic::riscv_vloxseg7_mask:
1768 case Intrinsic::riscv_vloxseg8_mask:
1769 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1770 return;
1771 case Intrinsic::riscv_vluxseg2_mask:
1772 case Intrinsic::riscv_vluxseg3_mask:
1773 case Intrinsic::riscv_vluxseg4_mask:
1774 case Intrinsic::riscv_vluxseg5_mask:
1775 case Intrinsic::riscv_vluxseg6_mask:
1776 case Intrinsic::riscv_vluxseg7_mask:
1777 case Intrinsic::riscv_vluxseg8_mask:
1778 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1779 return;
1780 case Intrinsic::riscv_vlseg8ff:
1781 case Intrinsic::riscv_vlseg7ff:
1782 case Intrinsic::riscv_vlseg6ff:
1783 case Intrinsic::riscv_vlseg5ff:
1784 case Intrinsic::riscv_vlseg4ff:
1785 case Intrinsic::riscv_vlseg3ff:
1786 case Intrinsic::riscv_vlseg2ff: {
1787 selectVLSEGFF(Node, /*IsMasked*/ false);
1788 return;
1789 }
1790 case Intrinsic::riscv_vlseg8ff_mask:
1791 case Intrinsic::riscv_vlseg7ff_mask:
1792 case Intrinsic::riscv_vlseg6ff_mask:
1793 case Intrinsic::riscv_vlseg5ff_mask:
1794 case Intrinsic::riscv_vlseg4ff_mask:
1795 case Intrinsic::riscv_vlseg3ff_mask:
1796 case Intrinsic::riscv_vlseg2ff_mask: {
1797 selectVLSEGFF(Node, /*IsMasked*/ true);
1798 return;
1799 }
1800 case Intrinsic::riscv_vloxei:
1801 case Intrinsic::riscv_vloxei_mask:
1802 case Intrinsic::riscv_vluxei:
1803 case Intrinsic::riscv_vluxei_mask: {
1804 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1805 IntNo == Intrinsic::riscv_vluxei_mask;
1806 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1807 IntNo == Intrinsic::riscv_vloxei_mask;
1808
1809 MVT VT = Node->getSimpleValueType(0);
1810 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1811
1812 unsigned CurOp = 2;
1814 Operands.push_back(Node->getOperand(CurOp++));
1815
1816 MVT IndexVT;
1817 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1818 /*IsStridedOrIndexed*/ true, Operands,
1819 /*IsLoad=*/true, &IndexVT);
1820
1822 "Element count mismatch");
1823
1825 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1826 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1827 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1828 report_fatal_error("The V extension does not support EEW=64 for index "
1829 "values when XLEN=32");
1830 }
1831 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
1832 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
1833 static_cast<unsigned>(IndexLMUL));
1834 MachineSDNode *Load =
1835 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1836
1837 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1838 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1839
1840 ReplaceNode(Node, Load);
1841 return;
1842 }
1843 case Intrinsic::riscv_vlm:
1844 case Intrinsic::riscv_vle:
1845 case Intrinsic::riscv_vle_mask:
1846 case Intrinsic::riscv_vlse:
1847 case Intrinsic::riscv_vlse_mask: {
1848 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
1849 IntNo == Intrinsic::riscv_vlse_mask;
1850 bool IsStrided =
1851 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
1852
1853 MVT VT = Node->getSimpleValueType(0);
1854 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1855
1856 // The riscv_vlm intrinsic are always tail agnostic and no passthru
1857 // operand at the IR level. In pseudos, they have both policy and
1858 // passthru operand. The passthru operand is needed to track the
1859 // "tail undefined" state, and the policy is there just for
1860 // for consistency - it will always be "don't care" for the
1861 // unmasked form.
1862 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
1863 unsigned CurOp = 2;
1865 if (HasPassthruOperand)
1866 Operands.push_back(Node->getOperand(CurOp++));
1867 else {
1868 // We eagerly lower to implicit_def (instead of undef), as we
1869 // otherwise fail to select nodes such as: nxv1i1 = undef
1870 SDNode *Passthru =
1871 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
1872 Operands.push_back(SDValue(Passthru, 0));
1873 }
1874 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1875 Operands, /*IsLoad=*/true);
1876
1878 const RISCV::VLEPseudo *P =
1879 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
1880 static_cast<unsigned>(LMUL));
1881 MachineSDNode *Load =
1882 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1883
1884 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1885 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1886
1887 ReplaceNode(Node, Load);
1888 return;
1889 }
1890 case Intrinsic::riscv_vleff:
1891 case Intrinsic::riscv_vleff_mask: {
1892 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
1893
1894 MVT VT = Node->getSimpleValueType(0);
1895 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1896
1897 unsigned CurOp = 2;
1899 Operands.push_back(Node->getOperand(CurOp++));
1900 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1901 /*IsStridedOrIndexed*/ false, Operands,
1902 /*IsLoad=*/true);
1903
1905 const RISCV::VLEPseudo *P =
1906 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
1907 Log2SEW, static_cast<unsigned>(LMUL));
1909 P->Pseudo, DL, Node->getVTList(), Operands);
1910 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1911 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1912
1913 ReplaceNode(Node, Load);
1914 return;
1915 }
1916 }
1917 break;
1918 }
1919 case ISD::INTRINSIC_VOID: {
1920 unsigned IntNo = Node->getConstantOperandVal(1);
1921 switch (IntNo) {
1922 case Intrinsic::riscv_vsseg2:
1923 case Intrinsic::riscv_vsseg3:
1924 case Intrinsic::riscv_vsseg4:
1925 case Intrinsic::riscv_vsseg5:
1926 case Intrinsic::riscv_vsseg6:
1927 case Intrinsic::riscv_vsseg7:
1928 case Intrinsic::riscv_vsseg8: {
1929 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1930 return;
1931 }
1932 case Intrinsic::riscv_vsseg2_mask:
1933 case Intrinsic::riscv_vsseg3_mask:
1934 case Intrinsic::riscv_vsseg4_mask:
1935 case Intrinsic::riscv_vsseg5_mask:
1936 case Intrinsic::riscv_vsseg6_mask:
1937 case Intrinsic::riscv_vsseg7_mask:
1938 case Intrinsic::riscv_vsseg8_mask: {
1939 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1940 return;
1941 }
1942 case Intrinsic::riscv_vssseg2:
1943 case Intrinsic::riscv_vssseg3:
1944 case Intrinsic::riscv_vssseg4:
1945 case Intrinsic::riscv_vssseg5:
1946 case Intrinsic::riscv_vssseg6:
1947 case Intrinsic::riscv_vssseg7:
1948 case Intrinsic::riscv_vssseg8: {
1949 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1950 return;
1951 }
1952 case Intrinsic::riscv_vssseg2_mask:
1953 case Intrinsic::riscv_vssseg3_mask:
1954 case Intrinsic::riscv_vssseg4_mask:
1955 case Intrinsic::riscv_vssseg5_mask:
1956 case Intrinsic::riscv_vssseg6_mask:
1957 case Intrinsic::riscv_vssseg7_mask:
1958 case Intrinsic::riscv_vssseg8_mask: {
1959 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1960 return;
1961 }
1962 case Intrinsic::riscv_vsoxseg2:
1963 case Intrinsic::riscv_vsoxseg3:
1964 case Intrinsic::riscv_vsoxseg4:
1965 case Intrinsic::riscv_vsoxseg5:
1966 case Intrinsic::riscv_vsoxseg6:
1967 case Intrinsic::riscv_vsoxseg7:
1968 case Intrinsic::riscv_vsoxseg8:
1969 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1970 return;
1971 case Intrinsic::riscv_vsuxseg2:
1972 case Intrinsic::riscv_vsuxseg3:
1973 case Intrinsic::riscv_vsuxseg4:
1974 case Intrinsic::riscv_vsuxseg5:
1975 case Intrinsic::riscv_vsuxseg6:
1976 case Intrinsic::riscv_vsuxseg7:
1977 case Intrinsic::riscv_vsuxseg8:
1978 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1979 return;
1980 case Intrinsic::riscv_vsoxseg2_mask:
1981 case Intrinsic::riscv_vsoxseg3_mask:
1982 case Intrinsic::riscv_vsoxseg4_mask:
1983 case Intrinsic::riscv_vsoxseg5_mask:
1984 case Intrinsic::riscv_vsoxseg6_mask:
1985 case Intrinsic::riscv_vsoxseg7_mask:
1986 case Intrinsic::riscv_vsoxseg8_mask:
1987 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1988 return;
1989 case Intrinsic::riscv_vsuxseg2_mask:
1990 case Intrinsic::riscv_vsuxseg3_mask:
1991 case Intrinsic::riscv_vsuxseg4_mask:
1992 case Intrinsic::riscv_vsuxseg5_mask:
1993 case Intrinsic::riscv_vsuxseg6_mask:
1994 case Intrinsic::riscv_vsuxseg7_mask:
1995 case Intrinsic::riscv_vsuxseg8_mask:
1996 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1997 return;
1998 case Intrinsic::riscv_vsoxei:
1999 case Intrinsic::riscv_vsoxei_mask:
2000 case Intrinsic::riscv_vsuxei:
2001 case Intrinsic::riscv_vsuxei_mask: {
2002 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2003 IntNo == Intrinsic::riscv_vsuxei_mask;
2004 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2005 IntNo == Intrinsic::riscv_vsoxei_mask;
2006
2007 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2008 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2009
2010 unsigned CurOp = 2;
2012 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2013
2014 MVT IndexVT;
2015 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2016 /*IsStridedOrIndexed*/ true, Operands,
2017 /*IsLoad=*/false, &IndexVT);
2018
2020 "Element count mismatch");
2021
2023 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2024 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2025 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2026 report_fatal_error("The V extension does not support EEW=64 for index "
2027 "values when XLEN=32");
2028 }
2029 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2030 IsMasked, IsOrdered, IndexLog2EEW,
2031 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2032 MachineSDNode *Store =
2033 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2034
2035 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2036 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2037
2038 ReplaceNode(Node, Store);
2039 return;
2040 }
2041 case Intrinsic::riscv_vsm:
2042 case Intrinsic::riscv_vse:
2043 case Intrinsic::riscv_vse_mask:
2044 case Intrinsic::riscv_vsse:
2045 case Intrinsic::riscv_vsse_mask: {
2046 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2047 IntNo == Intrinsic::riscv_vsse_mask;
2048 bool IsStrided =
2049 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2050
2051 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2052 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2053
2054 unsigned CurOp = 2;
2056 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2057
2058 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2059 Operands);
2060
2062 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2063 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2064 MachineSDNode *Store =
2065 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2066 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2067 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2068
2069 ReplaceNode(Node, Store);
2070 return;
2071 }
2072 case Intrinsic::riscv_sf_vc_x_se:
2073 case Intrinsic::riscv_sf_vc_i_se:
2074 selectSF_VC_X_SE(Node);
2075 return;
2076 }
2077 break;
2078 }
2079 case ISD::BITCAST: {
2080 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2081 // Just drop bitcasts between vectors if both are fixed or both are
2082 // scalable.
2083 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2084 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2085 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2086 CurDAG->RemoveDeadNode(Node);
2087 return;
2088 }
2089 break;
2090 }
2091 case ISD::INSERT_SUBVECTOR: {
2092 SDValue V = Node->getOperand(0);
2093 SDValue SubV = Node->getOperand(1);
2094 SDLoc DL(SubV);
2095 auto Idx = Node->getConstantOperandVal(2);
2096 MVT SubVecVT = SubV.getSimpleValueType();
2097
2098 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2099 MVT SubVecContainerVT = SubVecVT;
2100 // Establish the correct scalable-vector types for any fixed-length type.
2101 if (SubVecVT.isFixedLengthVector()) {
2102 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2104 [[maybe_unused]] bool ExactlyVecRegSized =
2105 Subtarget->expandVScale(SubVecVT.getSizeInBits())
2106 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2107 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2108 .getKnownMinValue()));
2109 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2110 }
2111 MVT ContainerVT = VT;
2112 if (VT.isFixedLengthVector())
2113 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2114
2115 const auto *TRI = Subtarget->getRegisterInfo();
2116 unsigned SubRegIdx;
2117 std::tie(SubRegIdx, Idx) =
2119 ContainerVT, SubVecContainerVT, Idx, TRI);
2120
2121 // If the Idx hasn't been completely eliminated then this is a subvector
2122 // insert which doesn't naturally align to a vector register. These must
2123 // be handled using instructions to manipulate the vector registers.
2124 if (Idx != 0)
2125 break;
2126
2127 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
2128 [[maybe_unused]] bool IsSubVecPartReg =
2129 SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
2130 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
2131 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
2132 assert((!IsSubVecPartReg || V.isUndef()) &&
2133 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2134 "the subvector is smaller than a full-sized register");
2135
2136 // If we haven't set a SubRegIdx, then we must be going between
2137 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2138 if (SubRegIdx == RISCV::NoSubRegister) {
2139 unsigned InRegClassID =
2142 InRegClassID &&
2143 "Unexpected subvector extraction");
2144 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2145 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2146 DL, VT, SubV, RC);
2147 ReplaceNode(Node, NewNode);
2148 return;
2149 }
2150
2151 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2152 ReplaceNode(Node, Insert.getNode());
2153 return;
2154 }
2156 SDValue V = Node->getOperand(0);
2157 auto Idx = Node->getConstantOperandVal(1);
2158 MVT InVT = V.getSimpleValueType();
2159 SDLoc DL(V);
2160
2161 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2162 MVT SubVecContainerVT = VT;
2163 // Establish the correct scalable-vector types for any fixed-length type.
2164 if (VT.isFixedLengthVector()) {
2165 assert(Idx == 0);
2166 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2167 }
2168 if (InVT.isFixedLengthVector())
2169 InVT = TLI.getContainerForFixedLengthVector(InVT);
2170
2171 const auto *TRI = Subtarget->getRegisterInfo();
2172 unsigned SubRegIdx;
2173 std::tie(SubRegIdx, Idx) =
2175 InVT, SubVecContainerVT, Idx, TRI);
2176
2177 // If the Idx hasn't been completely eliminated then this is a subvector
2178 // extract which doesn't naturally align to a vector register. These must
2179 // be handled using instructions to manipulate the vector registers.
2180 if (Idx != 0)
2181 break;
2182
2183 // If we haven't set a SubRegIdx, then we must be going between
2184 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2185 if (SubRegIdx == RISCV::NoSubRegister) {
2186 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2188 InRegClassID &&
2189 "Unexpected subvector extraction");
2190 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2191 SDNode *NewNode =
2192 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2193 ReplaceNode(Node, NewNode);
2194 return;
2195 }
2196
2197 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2198 ReplaceNode(Node, Extract.getNode());
2199 return;
2200 }
2204 case RISCVISD::VFMV_V_F_VL: {
2205 // Try to match splat of a scalar load to a strided load with stride of x0.
2206 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2207 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2208 if (!Node->getOperand(0).isUndef())
2209 break;
2210 SDValue Src = Node->getOperand(1);
2211 auto *Ld = dyn_cast<LoadSDNode>(Src);
2212 // Can't fold load update node because the second
2213 // output is used so that load update node can't be removed.
2214 if (!Ld || Ld->isIndexed())
2215 break;
2216 EVT MemVT = Ld->getMemoryVT();
2217 // The memory VT should be the same size as the element type.
2218 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2219 break;
2220 if (!IsProfitableToFold(Src, Node, Node) ||
2221 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2222 break;
2223
2224 SDValue VL;
2225 if (IsScalarMove) {
2226 // We could deal with more VL if we update the VSETVLI insert pass to
2227 // avoid introducing more VSETVLI.
2228 if (!isOneConstant(Node->getOperand(2)))
2229 break;
2230 selectVLOp(Node->getOperand(2), VL);
2231 } else
2232 selectVLOp(Node->getOperand(2), VL);
2233
2234 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2235 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2236
2237 // If VL=1, then we don't need to do a strided load and can just do a
2238 // regular load.
2239 bool IsStrided = !isOneConstant(VL);
2240
2241 // Only do a strided load if we have optimized zero-stride vector load.
2242 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2243 break;
2244
2246 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2247 Ld->getBasePtr()};
2248 if (IsStrided)
2249 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2251 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2252 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2253
2255 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2256 /*IsMasked*/ false, IsStrided, /*FF*/ false,
2257 Log2SEW, static_cast<unsigned>(LMUL));
2258 MachineSDNode *Load =
2259 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2260 // Update the chain.
2261 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2262 // Record the mem-refs
2263 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2264 // Replace the splat with the vlse.
2265 ReplaceNode(Node, Load);
2266 return;
2267 }
2268 case ISD::PREFETCH:
2269 unsigned Locality = Node->getConstantOperandVal(3);
2270 if (Locality > 2)
2271 break;
2272
2273 if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) {
2274 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2276
2277 int NontemporalLevel = 0;
2278 switch (Locality) {
2279 case 0:
2280 NontemporalLevel = 3; // NTL.ALL
2281 break;
2282 case 1:
2283 NontemporalLevel = 1; // NTL.PALL
2284 break;
2285 case 2:
2286 NontemporalLevel = 0; // NTL.P1
2287 break;
2288 default:
2289 llvm_unreachable("unexpected locality value.");
2290 }
2291
2292 if (NontemporalLevel & 0b1)
2294 if (NontemporalLevel & 0b10)
2296 }
2297 break;
2298 }
2299
2300 // Select the default instruction.
2301 SelectCode(Node);
2302}
2303
2305 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2306 std::vector<SDValue> &OutOps) {
2307 // Always produce a register and immediate operand, as expected by
2308 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2309 switch (ConstraintID) {
2312 SDValue Op0, Op1;
2313 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2314 assert(Found && "SelectAddrRegImm should always succeed");
2315 OutOps.push_back(Op0);
2316 OutOps.push_back(Op1);
2317 return false;
2318 }
2320 OutOps.push_back(Op);
2321 OutOps.push_back(
2322 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2323 return false;
2324 default:
2325 report_fatal_error("Unexpected asm memory constraint " +
2326 InlineAsm::getMemConstraintName(ConstraintID));
2327 }
2328
2329 return true;
2330}
2331
2333 SDValue &Offset) {
2334 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2335 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2336 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2337 return true;
2338 }
2339
2340 return false;
2341}
2342
2343// Select a frame index and an optional immediate offset from an ADD or OR.
2345 SDValue &Offset) {
2347 return true;
2348
2350 return false;
2351
2352 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
2353 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2354 if (isInt<12>(CVal)) {
2355 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
2356 Subtarget->getXLenVT());
2358 Subtarget->getXLenVT());
2359 return true;
2360 }
2361 }
2362
2363 return false;
2364}
2365
2366// Fold constant addresses.
2367static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2368 const MVT VT, const RISCVSubtarget *Subtarget,
2370 bool IsPrefetch = false) {
2371 if (!isa<ConstantSDNode>(Addr))
2372 return false;
2373
2374 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2375
2376 // If the constant is a simm12, we can fold the whole constant and use X0 as
2377 // the base. If the constant can be materialized with LUI+simm12, use LUI as
2378 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2379 int64_t Lo12 = SignExtend64<12>(CVal);
2380 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2381 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2382 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2383 return false;
2384
2385 if (Hi) {
2386 int64_t Hi20 = (Hi >> 12) & 0xfffff;
2387 Base = SDValue(
2388 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2389 CurDAG->getTargetConstant(Hi20, DL, VT)),
2390 0);
2391 } else {
2392 Base = CurDAG->getRegister(RISCV::X0, VT);
2393 }
2394 Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2395 return true;
2396 }
2397
2398 // Ask how constant materialization would handle this constant.
2399 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2400
2401 // If the last instruction would be an ADDI, we can fold its immediate and
2402 // emit the rest of the sequence as the base.
2403 if (Seq.back().getOpcode() != RISCV::ADDI)
2404 return false;
2405 Lo12 = Seq.back().getImm();
2406 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2407 return false;
2408
2409 // Drop the last instruction.
2410 Seq.pop_back();
2411 assert(!Seq.empty() && "Expected more instructions in sequence");
2412
2413 Base = selectImmSeq(CurDAG, DL, VT, Seq);
2414 Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2415 return true;
2416}
2417
2418// Is this ADD instruction only used as the base pointer of scalar loads and
2419// stores?
2421 for (auto *Use : Add->uses()) {
2422 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
2423 Use->getOpcode() != ISD::ATOMIC_LOAD &&
2424 Use->getOpcode() != ISD::ATOMIC_STORE)
2425 return false;
2426 EVT VT = cast<MemSDNode>(Use)->getMemoryVT();
2427 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2428 VT != MVT::f64)
2429 return false;
2430 // Don't allow stores of the value. It must be used as the address.
2431 if (Use->getOpcode() == ISD::STORE &&
2432 cast<StoreSDNode>(Use)->getValue() == Add)
2433 return false;
2434 if (Use->getOpcode() == ISD::ATOMIC_STORE &&
2435 cast<AtomicSDNode>(Use)->getVal() == Add)
2436 return false;
2437 }
2438
2439 return true;
2440}
2441
2443 unsigned MaxShiftAmount,
2445 SDValue &Scale) {
2446 EVT VT = Addr.getSimpleValueType();
2447 auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2448 SDValue &Shift) {
2449 uint64_t ShiftAmt = 0;
2450 Index = N;
2451
2452 if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
2453 // Only match shifts by a value in range [0, MaxShiftAmount].
2454 if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
2455 Index = N.getOperand(0);
2456 ShiftAmt = N.getConstantOperandVal(1);
2457 }
2458 }
2459
2460 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
2461 return ShiftAmt != 0;
2462 };
2463
2464 if (Addr.getOpcode() == ISD::ADD) {
2465 if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2466 SDValue AddrB = Addr.getOperand(0);
2467 if (AddrB.getOpcode() == ISD::ADD &&
2468 UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
2469 !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
2470 isInt<12>(C1->getSExtValue())) {
2471 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2472 SDValue C1Val =
2473 CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
2474 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
2475 AddrB.getOperand(1), C1Val),
2476 0);
2477 return true;
2478 }
2479 } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
2480 Base = Addr.getOperand(1);
2481 return true;
2482 } else {
2483 UnwrapShl(Addr.getOperand(1), Index, Scale);
2484 Base = Addr.getOperand(0);
2485 return true;
2486 }
2487 } else if (UnwrapShl(Addr, Index, Scale)) {
2488 EVT VT = Addr.getValueType();
2489 Base = CurDAG->getRegister(RISCV::X0, VT);
2490 return true;
2491 }
2492
2493 return false;
2494}
2495
2497 SDValue &Offset, bool IsINX) {
2499 return true;
2500
2501 SDLoc DL(Addr);
2502 MVT VT = Addr.getSimpleValueType();
2503
2504 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2505 Base = Addr.getOperand(0);
2506 Offset = Addr.getOperand(1);
2507 return true;
2508 }
2509
2510 int64_t RV32ZdinxRange = IsINX ? 4 : 0;
2512 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2513 if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
2514 Base = Addr.getOperand(0);
2515 if (Base.getOpcode() == RISCVISD::ADD_LO) {
2516 SDValue LoOperand = Base.getOperand(1);
2517 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2518 // If the Lo in (ADD_LO hi, lo) is a global variable's address
2519 // (its low part, really), then we can rely on the alignment of that
2520 // variable to provide a margin of safety before low part can overflow
2521 // the 12 bits of the load/store offset. Check if CVal falls within
2522 // that margin; if so (low part + CVal) can't overflow.
2523 const DataLayout &DL = CurDAG->getDataLayout();
2524 Align Alignment = commonAlignment(
2525 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2526 if (CVal == 0 || Alignment > CVal) {
2527 int64_t CombinedOffset = CVal + GA->getOffset();
2528 Base = Base.getOperand(0);
2530 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2531 CombinedOffset, GA->getTargetFlags());
2532 return true;
2533 }
2534 }
2535 }
2536
2537 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2538 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2539 Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2540 return true;
2541 }
2542 }
2543
2544 // Handle ADD with large immediates.
2545 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2546 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2547 assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
2548 "simm12 not already handled?");
2549
2550 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2551 // an ADDI for part of the offset and fold the rest into the load/store.
2552 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2553 if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
2554 int64_t Adj = CVal < 0 ? -2048 : 2047;
2555 Base = SDValue(
2556 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2557 CurDAG->getTargetConstant(Adj, DL, VT)),
2558 0);
2559 Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);
2560 return true;
2561 }
2562
2563 // For larger immediates, we might be able to save one instruction from
2564 // constant materialization by folding the Lo12 bits of the immediate into
2565 // the address. We should only do this if the ADD is only used by loads and
2566 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2567 // separately with the full materialized immediate creating extra
2568 // instructions.
2569 if (isWorthFoldingAdd(Addr) &&
2570 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2571 Offset)) {
2572 // Insert an ADD instruction with the materialized Hi52 bits.
2573 Base = SDValue(
2574 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2575 0);
2576 return true;
2577 }
2578 }
2579
2580 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset))
2581 return true;
2582
2583 Base = Addr;
2584 Offset = CurDAG->getTargetConstant(0, DL, VT);
2585 return true;
2586}
2587
2588/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2589/// Offset shoule be all zeros.
2591 SDValue &Offset) {
2593 return true;
2594
2595 SDLoc DL(Addr);
2596 MVT VT = Addr.getSimpleValueType();
2597
2599 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2600 if (isInt<12>(CVal)) {
2601 Base = Addr.getOperand(0);
2602
2603 // Early-out if not a valid offset.
2604 if ((CVal & 0b11111) != 0) {
2605 Base = Addr;
2606 Offset = CurDAG->getTargetConstant(0, DL, VT);
2607 return true;
2608 }
2609
2610 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2611 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2612 Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2613 return true;
2614 }
2615 }
2616
2617 // Handle ADD with large immediates.
2618 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2619 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2620 assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
2621 "simm12 not already handled?");
2622
2623 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
2624 // one instruction by folding adjustment (-2048 or 2016) into the address.
2625 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
2626 int64_t Adj = CVal < 0 ? -2048 : 2016;
2627 int64_t AdjustedOffset = CVal - Adj;
2629 RISCV::ADDI, DL, VT, Addr.getOperand(0),
2630 CurDAG->getTargetConstant(AdjustedOffset, DL, VT)),
2631 0);
2632 Offset = CurDAG->getTargetConstant(Adj, DL, VT);
2633 return true;
2634 }
2635
2636 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2637 Offset, true)) {
2638 // Insert an ADD instruction with the materialized Hi52 bits.
2639 Base = SDValue(
2640 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2641 0);
2642 return true;
2643 }
2644 }
2645
2646 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true))
2647 return true;
2648
2649 Base = Addr;
2650 Offset = CurDAG->getTargetConstant(0, DL, VT);
2651 return true;
2652}
2653
2655 SDValue &ShAmt) {
2656 ShAmt = N;
2657
2658 // Peek through zext.
2659 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
2660 ShAmt = ShAmt.getOperand(0);
2661
2662 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
2663 // amount. If there is an AND on the shift amount, we can bypass it if it
2664 // doesn't affect any of those bits.
2665 if (ShAmt.getOpcode() == ISD::AND &&
2666 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2667 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
2668
2669 // Since the max shift amount is a power of 2 we can subtract 1 to make a
2670 // mask that covers the bits needed to represent all shift amounts.
2671 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
2672 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
2673
2674 if (ShMask.isSubsetOf(AndMask)) {
2675 ShAmt = ShAmt.getOperand(0);
2676 } else {
2677 // SimplifyDemandedBits may have optimized the mask so try restoring any
2678 // bits that are known zero.
2679 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
2680 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
2681 return true;
2682 ShAmt = ShAmt.getOperand(0);
2683 }
2684 }
2685
2686 if (ShAmt.getOpcode() == ISD::ADD &&
2687 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2688 uint64_t Imm = ShAmt.getConstantOperandVal(1);
2689 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
2690 // to avoid the ADD.
2691 if (Imm != 0 && Imm % ShiftWidth == 0) {
2692 ShAmt = ShAmt.getOperand(0);
2693 return true;
2694 }
2695 } else if (ShAmt.getOpcode() == ISD::SUB &&
2696 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
2697 uint64_t Imm = ShAmt.getConstantOperandVal(0);
2698 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2699 // generate a NEG instead of a SUB of a constant.
2700 if (Imm != 0 && Imm % ShiftWidth == 0) {
2701 SDLoc DL(ShAmt);
2702 EVT VT = ShAmt.getValueType();
2703 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
2704 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
2705 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
2706 ShAmt.getOperand(1));
2707 ShAmt = SDValue(Neg, 0);
2708 return true;
2709 }
2710 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2711 // to generate a NOT instead of a SUB of a constant.
2712 if (Imm % ShiftWidth == ShiftWidth - 1) {
2713 SDLoc DL(ShAmt);
2714 EVT VT = ShAmt.getValueType();
2715 MachineSDNode *Not =
2716 CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1),
2717 CurDAG->getTargetConstant(-1, DL, VT));
2718 ShAmt = SDValue(Not, 0);
2719 return true;
2720 }
2721 }
2722
2723 return true;
2724}
2725
2726/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2727/// check for equality with 0. This function emits instructions that convert the
2728/// seteq/setne into something that can be compared with 0.
2729/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
2730/// ISD::SETNE).
2732 SDValue &Val) {
2733 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
2734 "Unexpected condition code!");
2735
2736 // We're looking for a setcc.
2737 if (N->getOpcode() != ISD::SETCC)
2738 return false;
2739
2740 // Must be an equality comparison.
2741 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
2742 if (CCVal != ExpectedCCVal)
2743 return false;
2744
2745 SDValue LHS = N->getOperand(0);
2746 SDValue RHS = N->getOperand(1);
2747
2748 if (!LHS.getValueType().isScalarInteger())
2749 return false;
2750
2751 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
2752 if (isNullConstant(RHS)) {
2753 Val = LHS;
2754 return true;
2755 }
2756
2757 SDLoc DL(N);
2758
2759 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
2760 int64_t CVal = C->getSExtValue();
2761 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2762 // non-zero otherwise.
2763 if (CVal == -2048) {
2764 Val =
2766 RISCV::XORI, DL, N->getValueType(0), LHS,
2767 CurDAG->getTargetConstant(CVal, DL, N->getValueType(0))),
2768 0);
2769 return true;
2770 }
2771 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
2772 // LHS is equal to the RHS and non-zero otherwise.
2773 if (isInt<12>(CVal) || CVal == 2048) {
2774 Val =
2776 RISCV::ADDI, DL, N->getValueType(0), LHS,
2777 CurDAG->getTargetConstant(-CVal, DL, N->getValueType(0))),
2778 0);
2779 return true;
2780 }
2781 }
2782
2783 // If nothing else we can XOR the LHS and RHS to produce zero if they are
2784 // equal and a non-zero value if they aren't.
2785 Val = SDValue(
2786 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
2787 return true;
2788}
2789
2791 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
2792 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
2793 Val = N.getOperand(0);
2794 return true;
2795 }
2796
2797 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
2798 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
2799 return N;
2800
2801 SDValue N0 = N.getOperand(0);
2802 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2803 N.getConstantOperandVal(1) == ShiftAmt &&
2804 N0.getConstantOperandVal(1) == ShiftAmt)
2805 return N0.getOperand(0);
2806
2807 return N;
2808 };
2809
2810 MVT VT = N.getSimpleValueType();
2811 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
2812 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
2813 return true;
2814 }
2815
2816 return false;
2817}
2818
2820 if (N.getOpcode() == ISD::AND) {
2821 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
2822 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
2823 Val = N.getOperand(0);
2824 return true;
2825 }
2826 }
2827 MVT VT = N.getSimpleValueType();
2828 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
2829 if (CurDAG->MaskedValueIsZero(N, Mask)) {
2830 Val = N;
2831 return true;
2832 }
2833
2834 return false;
2835}
2836
2837/// Look for various patterns that can be done with a SHL that can be folded
2838/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
2839/// SHXADD we are trying to match.
2841 SDValue &Val) {
2842 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
2843 SDValue N0 = N.getOperand(0);
2844
2845 bool LeftShift = N0.getOpcode() == ISD::SHL;
2846 if ((LeftShift || N0.getOpcode() == ISD::SRL) &&
2847 isa<ConstantSDNode>(N0.getOperand(1))) {
2848 uint64_t Mask = N.getConstantOperandVal(1);
2849 unsigned C2 = N0.getConstantOperandVal(1);
2850
2851 unsigned XLen = Subtarget->getXLen();
2852 if (LeftShift)
2853 Mask &= maskTrailingZeros<uint64_t>(C2);
2854 else
2855 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
2856
2857 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
2858 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
2859 // followed by a SHXADD with c3 for the X amount.
2860 if (isShiftedMask_64(Mask)) {
2861 unsigned Leading = XLen - llvm::bit_width(Mask);
2862 unsigned Trailing = llvm::countr_zero(Mask);
2863 if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
2864 SDLoc DL(N);
2865 EVT VT = N.getValueType();
2867 RISCV::SRLI, DL, VT, N0.getOperand(0),
2868 CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
2869 0);
2870 return true;
2871 }
2872 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
2873 // leading zeros and c3 trailing zeros. We can use an SRLI by C3
2874 // followed by a SHXADD using c3 for the X amount.
2875 if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
2876 SDLoc DL(N);
2877 EVT VT = N.getValueType();
2878 Val = SDValue(
2880 RISCV::SRLI, DL, VT, N0.getOperand(0),
2881 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
2882 0);
2883 return true;
2884 }
2885 }
2886 }
2887 }
2888
2889 bool LeftShift = N.getOpcode() == ISD::SHL;
2890 if ((LeftShift || N.getOpcode() == ISD::SRL) &&
2891 isa<ConstantSDNode>(N.getOperand(1))) {
2892 SDValue N0 = N.getOperand(0);
2893 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
2894 isa<ConstantSDNode>(N0.getOperand(1))) {
2895 uint64_t Mask = N0.getConstantOperandVal(1);
2896 if (isShiftedMask_64(Mask)) {
2897 unsigned C1 = N.getConstantOperandVal(1);
2898 unsigned XLen = Subtarget->getXLen();
2899 unsigned Leading = XLen - llvm::bit_width(Mask);
2900 unsigned Trailing = llvm::countr_zero(Mask);
2901 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
2902 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
2903 if (LeftShift && Leading == 32 && Trailing > 0 &&
2904 (Trailing + C1) == ShAmt) {
2905 SDLoc DL(N);
2906 EVT VT = N.getValueType();
2908 RISCV::SRLIW, DL, VT, N0.getOperand(0),
2909 CurDAG->getTargetConstant(Trailing, DL, VT)),
2910 0);
2911 return true;
2912 }
2913 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
2914 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
2915 if (!LeftShift && Leading == 32 && Trailing > C1 &&
2916 (Trailing - C1) == ShAmt) {
2917 SDLoc DL(N);
2918 EVT VT = N.getValueType();
2920 RISCV::SRLIW, DL, VT, N0.getOperand(0),
2921 CurDAG->getTargetConstant(Trailing, DL, VT)),
2922 0);
2923 return true;
2924 }
2925 }
2926 }
2927 }
2928
2929 return false;
2930}
2931
2932/// Look for various patterns that can be done with a SHL that can be folded
2933/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
2934/// SHXADD_UW we are trying to match.
2936 SDValue &Val) {
2937 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
2938 N.hasOneUse()) {
2939 SDValue N0 = N.getOperand(0);
2940 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2941 N0.hasOneUse()) {
2942 uint64_t Mask = N.getConstantOperandVal(1);
2943 unsigned C2 = N0.getConstantOperandVal(1);
2944
2945 Mask &= maskTrailingZeros<uint64_t>(C2);
2946
2947 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
2948 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
2949 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
2950 if (isShiftedMask_64(Mask)) {
2951 unsigned Leading = llvm::countl_zero(Mask);
2952 unsigned Trailing = llvm::countr_zero(Mask);
2953 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
2954 SDLoc DL(N);
2955 EVT VT = N.getValueType();
2957 RISCV::SLLI, DL, VT, N0.getOperand(0),
2958 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
2959 0);
2960 return true;
2961 }
2962 }
2963 }
2964 }
2965
2966 return false;
2967}
2968
2969static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
2970 unsigned Bits,
2971 const TargetInstrInfo *TII) {
2972 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
2973
2974 if (!MCOpcode)
2975 return false;
2976
2977 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
2978 const uint64_t TSFlags = MCID.TSFlags;
2979 if (!RISCVII::hasSEWOp(TSFlags))
2980 return false;
2981 assert(RISCVII::hasVLOp(TSFlags));
2982
2983 bool HasGlueOp = User->getGluedNode() != nullptr;
2984 unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
2985 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
2986 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
2987 unsigned VLIdx =
2988 User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
2989 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
2990
2991 if (UserOpNo == VLIdx)
2992 return false;
2993
2994 auto NumDemandedBits =
2995 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
2996 return NumDemandedBits && Bits >= *NumDemandedBits;
2997}
2998
2999// Return true if all users of this SDNode* only consume the lower \p Bits.
3000// This can be used to form W instructions for add/sub/mul/shl even when the
3001// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3002// SimplifyDemandedBits has made it so some users see a sext_inreg and some
3003// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3004// the add/sub/mul/shl to become non-W instructions. By checking the users we
3005// may be able to use a W instruction and CSE with the other instruction if
3006// this has happened. We could try to detect that the CSE opportunity exists
3007// before doing this, but that would be more complicated.
3009 const unsigned Depth) const {
3010 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3011 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3012 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3013 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3014 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3015 isa<ConstantSDNode>(Node) || Depth != 0) &&
3016 "Unexpected opcode");
3017
3019 return false;
3020
3021 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3022 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3023 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
3024 return false;
3025
3026 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
3027 SDNode *User = *UI;
3028 // Users of this node should have already been instruction selected
3029 if (!User->isMachineOpcode())
3030 return false;
3031
3032 // TODO: Add more opcodes?
3033 switch (User->getMachineOpcode()) {
3034 default:
3035 if (vectorPseudoHasAllNBitUsers(User, UI.getOperandNo(), Bits, TII))
3036 break;
3037 return false;
3038 case RISCV::ADDW:
3039 case RISCV::ADDIW:
3040 case RISCV::SUBW:
3041 case RISCV::MULW:
3042 case RISCV::SLLW:
3043 case RISCV::SLLIW:
3044 case RISCV::SRAW:
3045 case RISCV::SRAIW:
3046 case RISCV::SRLW:
3047 case RISCV::SRLIW:
3048 case RISCV::DIVW:
3049 case RISCV::DIVUW:
3050 case RISCV::REMW:
3051 case RISCV::REMUW:
3052 case RISCV::ROLW:
3053 case RISCV::RORW:
3054 case RISCV::RORIW:
3055 case RISCV::CLZW:
3056 case RISCV::CTZW:
3057 case RISCV::CPOPW:
3058 case RISCV::SLLI_UW:
3059 case RISCV::FMV_W_X:
3060 case RISCV::FCVT_H_W:
3061 case RISCV::FCVT_H_WU:
3062 case RISCV::FCVT_S_W:
3063 case RISCV::FCVT_S_WU:
3064 case RISCV::FCVT_D_W:
3065 case RISCV::FCVT_D_WU:
3066 case RISCV::TH_REVW:
3067 case RISCV::TH_SRRIW:
3068 if (Bits < 32)
3069 return false;
3070 break;
3071 case RISCV::SLL:
3072 case RISCV::SRA:
3073 case RISCV::SRL:
3074 case RISCV::ROL:
3075 case RISCV::ROR:
3076 case RISCV::BSET:
3077 case RISCV::BCLR:
3078 case RISCV::BINV:
3079 // Shift amount operands only use log2(Xlen) bits.
3080 if (UI.getOperandNo() != 1 || Bits < Log2_32(Subtarget->getXLen()))
3081 return false;
3082 break;
3083 case RISCV::SLLI:
3084 // SLLI only uses the lower (XLen - ShAmt) bits.
3085 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
3086 return false;
3087 break;
3088 case RISCV::ANDI:
3089 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
3090 break;
3091 goto RecCheck;
3092 case RISCV::ORI: {
3093 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3094 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
3095 break;
3096 [[fallthrough]];
3097 }
3098 case RISCV::AND:
3099 case RISCV::OR:
3100 case RISCV::XOR:
3101 case RISCV::XORI:
3102 case RISCV::ANDN:
3103 case RISCV::ORN:
3104 case RISCV::XNOR:
3105 case RISCV::SH1ADD:
3106 case RISCV::SH2ADD:
3107 case RISCV::SH3ADD:
3108 RecCheck:
3109 if (hasAllNBitUsers(User, Bits, Depth + 1))
3110 break;
3111 return false;
3112 case RISCV::SRLI: {
3113 unsigned ShAmt = User->getConstantOperandVal(1);
3114 // If we are shifting right by less than Bits, and users don't demand any
3115 // bits that were shifted into [Bits-1:0], then we can consider this as an
3116 // N-Bit user.
3117 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
3118 break;
3119 return false;
3120 }
3121 case RISCV::SEXT_B:
3122 case RISCV::PACKH:
3123 if (Bits < 8)
3124 return false;
3125 break;
3126 case RISCV::SEXT_H:
3127 case RISCV::FMV_H_X:
3128 case RISCV::ZEXT_H_RV32:
3129 case RISCV::ZEXT_H_RV64:
3130 case RISCV::PACKW:
3131 if (Bits < 16)
3132 return false;
3133 break;
3134 case RISCV::PACK:
3135 if (Bits < (Subtarget->getXLen() / 2))
3136 return false;
3137 break;
3138 case RISCV::ADD_UW:
3139 case RISCV::SH1ADD_UW:
3140 case RISCV::SH2ADD_UW:
3141 case RISCV::SH3ADD_UW:
3142 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3143 // 32 bits.
3144 if (UI.getOperandNo() != 0 || Bits < 32)
3145 return false;
3146 break;
3147 case RISCV::SB:
3148 if (UI.getOperandNo() != 0 || Bits < 8)
3149 return false;
3150 break;
3151 case RISCV::SH:
3152 if (UI.getOperandNo() != 0 || Bits < 16)
3153 return false;
3154 break;
3155 case RISCV::SW:
3156 if (UI.getOperandNo() != 0 || Bits < 32)
3157 return false;
3158 break;
3159 }
3160 }
3161
3162 return true;
3163}
3164
3165// Select a constant that can be represented as (sign_extend(imm5) << imm2).
3167 SDValue &Shl2) {
3168 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3169 int64_t Offset = C->getSExtValue();
3170 int64_t Shift;
3171 for (Shift = 0; Shift < 4; Shift++)
3172 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
3173 break;
3174
3175 // Constant cannot be encoded.
3176 if (Shift == 4)
3177 return false;
3178
3179 EVT Ty = N->getValueType(0);
3180 Simm5 = CurDAG->getTargetConstant(Offset >> Shift, SDLoc(N), Ty);
3181 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
3182 return true;
3183 }
3184
3185 return false;
3186}
3187
3188// Select VL as a 5 bit immediate or a value that will become a register. This
3189// allows us to choose betwen VSETIVLI or VSETVLI later.
3191 auto *C = dyn_cast<ConstantSDNode>(N);
3192 if (C && isUInt<5>(C->getZExtValue())) {
3193 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
3194 N->getValueType(0));
3195 } else if (C && C->isAllOnes()) {
3196 // Treat all ones as VLMax.
3198 N->getValueType(0));
3199 } else if (isa<RegisterSDNode>(N) &&
3200 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
3201 // All our VL operands use an operand that allows GPRNoX0 or an immediate
3202 // as the register class. Convert X0 to a special immediate to pass the
3203 // MachineVerifier. This is recognized specially by the vsetvli insertion
3204 // pass.
3206 N->getValueType(0));
3207 } else {
3208 VL = N;
3209 }
3210
3211 return true;
3212}
3213
3215 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
3216 if (!N.getOperand(0).isUndef())
3217 return SDValue();
3218 N = N.getOperand(1);
3219 }
3220 SDValue Splat = N;
3221 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
3222 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
3223 !Splat.getOperand(0).isUndef())
3224 return SDValue();
3225 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
3226 return Splat;
3227}
3228
3231 if (!Splat)
3232 return false;
3233
3234 SplatVal = Splat.getOperand(1);
3235 return true;
3236}
3237
3239 SelectionDAG &DAG,
3240 const RISCVSubtarget &Subtarget,
3241 std::function<bool(int64_t)> ValidateImm) {
3243 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
3244 return false;
3245
3246 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
3247 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
3248 "Unexpected splat operand type");
3249
3250 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3251 // type is wider than the resulting vector element type: an implicit
3252 // truncation first takes place. Therefore, perform a manual
3253 // truncation/sign-extension in order to ignore any truncated bits and catch
3254 // any zero-extended immediate.
3255 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3256 // sign-extending to (XLenVT -1).
3257 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
3258
3259 int64_t SplatImm = SplatConst.getSExtValue();
3260
3261 if (!ValidateImm(SplatImm))
3262 return false;
3263
3264 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
3265 return true;
3266}
3267
3269 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
3270 [](int64_t Imm) { return isInt<5>(Imm); });
3271}
3272
3274 return selectVSplatImmHelper(
3275 N, SplatVal, *CurDAG, *Subtarget,
3276 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
3277}
3278
3280 SDValue &SplatVal) {
3281 return selectVSplatImmHelper(
3282 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
3283 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
3284 });
3285}
3286
3288 SDValue &SplatVal) {
3289 return selectVSplatImmHelper(
3290 N, SplatVal, *CurDAG, *Subtarget,
3291 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
3292}
3293
3295 auto IsExtOrTrunc = [](SDValue N) {
3296 switch (N->getOpcode()) {
3297 case ISD::SIGN_EXTEND:
3298 case ISD::ZERO_EXTEND:
3299 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
3300 // inactive elements will be undef.
3302 case RISCVISD::VSEXT_VL:
3303 case RISCVISD::VZEXT_VL:
3304 return true;
3305 default:
3306 return false;
3307 }
3308 };
3309
3310 // We can have multiple nested nodes, so unravel them all if needed.
3311 while (IsExtOrTrunc(N)) {
3312 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
3313 return false;
3314 N = N->getOperand(0);
3315 }
3316
3317 return selectVSplat(N, SplatVal);
3318}
3319
3321 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3322 if (!CFP)
3323 return false;
3324 const APFloat &APF = CFP->getValueAPF();
3325 // td can handle +0.0 already.
3326 if (APF.isPosZero())
3327 return false;
3328
3329 MVT VT = CFP->getSimpleValueType(0);
3330
3331 // Even if this FPImm requires an additional FNEG (i.e. the second element of
3332 // the returned pair is true) we still prefer FLI + FNEG over immediate
3333 // materialization as the latter might generate a longer instruction sequence.
3334 if (static_cast<const RISCVTargetLowering *>(TLI)
3335 ->getLegalZfaFPImm(APF, VT)
3336 .first >= 0)
3337 return false;
3338
3339 MVT XLenVT = Subtarget->getXLenVT();
3340 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3341 assert(APF.isNegZero() && "Unexpected constant.");
3342 return false;
3343 }
3344 SDLoc DL(N);
3345 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3346 *Subtarget);
3347 return true;
3348}
3349
3351 SDValue &Imm) {
3352 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3353 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3354
3355 if (!isInt<5>(ImmVal))
3356 return false;
3357
3358 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());
3359 return true;
3360 }
3361
3362 return false;
3363}
3364
3365// Try to remove sext.w if the input is a W instruction or can be made into
3366// a W instruction cheaply.
3367bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3368 // Look for the sext.w pattern, addiw rd, rs1, 0.
3369 if (N->getMachineOpcode() != RISCV::ADDIW ||
3370 !isNullConstant(N->getOperand(1)))
3371 return false;
3372
3373 SDValue N0 = N->getOperand(0);
3374 if (!N0.isMachineOpcode())
3375 return false;
3376
3377 switch (N0.getMachineOpcode()) {
3378 default:
3379 break;
3380 case RISCV::ADD:
3381 case RISCV::ADDI:
3382 case RISCV::SUB:
3383 case RISCV::MUL:
3384 case RISCV::SLLI: {
3385 // Convert sext.w+add/sub/mul to their W instructions. This will create
3386 // a new independent instruction. This improves latency.
3387 unsigned Opc;
3388 switch (N0.getMachineOpcode()) {
3389 default:
3390 llvm_unreachable("Unexpected opcode!");
3391 case RISCV::ADD: Opc = RISCV::ADDW; break;
3392 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3393 case RISCV::SUB: Opc = RISCV::SUBW; break;
3394 case RISCV::MUL: Opc = RISCV::MULW; break;
3395 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3396 }
3397
3398 SDValue N00 = N0.getOperand(0);
3399 SDValue N01 = N0.getOperand(1);
3400
3401 // Shift amount needs to be uimm5.
3402 if (N0.getMachineOpcode() == RISCV::SLLI &&
3403 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
3404 break;
3405
3406 SDNode *Result =
3407 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
3408 N00, N01);
3409 ReplaceUses(N, Result);
3410 return true;
3411 }
3412 case RISCV::ADDW:
3413 case RISCV::ADDIW:
3414 case RISCV::SUBW:
3415 case RISCV::MULW:
3416 case RISCV::SLLIW:
3417 case RISCV::PACKW:
3418 case RISCV::TH_MULAW:
3419 case RISCV::TH_MULAH:
3420 case RISCV::TH_MULSW:
3421 case RISCV::TH_MULSH:
3422 if (N0.getValueType() == MVT::i32)
3423 break;
3424
3425 // Result is already sign extended just remove the sext.w.
3426 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
3427 ReplaceUses(N, N0.getNode());
3428 return true;
3429 }
3430
3431 return false;
3432}
3433
3434static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
3435 // Check that we're using V0 as a mask register.
3436 if (!isa<RegisterSDNode>(MaskOp) ||
3437 cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)
3438 return false;
3439
3440 // The glued user defines V0.
3441 const auto *Glued = GlueOp.getNode();
3442
3443 if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
3444 return false;
3445
3446 // Check that we're defining V0 as a mask register.
3447 if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
3448 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
3449 return false;
3450
3451 // Check the instruction defining V0; it needs to be a VMSET pseudo.
3452 SDValue MaskSetter = Glued->getOperand(2);
3453
3454 // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
3455 // from an extract_subvector or insert_subvector.
3456 if (MaskSetter->isMachineOpcode() &&
3457 MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)
3458 MaskSetter = MaskSetter->getOperand(0);
3459
3460 const auto IsVMSet = [](unsigned Opc) {
3461 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
3462 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
3463 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
3464 Opc == RISCV::PseudoVMSET_M_B8;
3465 };
3466
3467 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3468 // undefined behaviour if it's the wrong bitwidth, so we could choose to
3469 // assume that it's all-ones? Same applies to its VL.
3470 return MaskSetter->isMachineOpcode() &&
3471 IsVMSet(MaskSetter.getMachineOpcode());
3472}
3473
3474// Return true if we can make sure mask of N is all-ones mask.
3475static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
3476 return usesAllOnesMask(N->getOperand(MaskOpIdx),
3477 N->getOperand(N->getNumOperands() - 1));
3478}
3479
3480static bool isImplicitDef(SDValue V) {
3481 return V.isMachineOpcode() &&
3482 V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
3483}
3484
3485// Optimize masked RVV pseudo instructions with a known all-ones mask to their
3486// corresponding "unmasked" pseudo versions. The mask we're interested in will
3487// take the form of a V0 physical register operand, with a glued
3488// register-setting instruction.
3489bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
3491 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
3492 if (!I)
3493 return false;
3494
3495 unsigned MaskOpIdx = I->MaskOpIdx;
3496 if (!usesAllOnesMask(N, MaskOpIdx))
3497 return false;
3498
3499 // There are two classes of pseudos in the table - compares and
3500 // everything else. See the comment on RISCVMaskedPseudo for details.
3501 const unsigned Opc = I->UnmaskedPseudo;
3502 const MCInstrDesc &MCID = TII->get(Opc);
3503 const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags);
3504#ifndef NDEBUG
3505 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
3508 "Masked and unmasked pseudos are inconsistent");
3509 const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID);
3510 assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure");
3511#endif
3512
3514 // Skip the merge operand at index 0 if !UseTUPseudo.
3515 for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) {
3516 // Skip the mask, and the Glue.
3517 SDValue Op = N->getOperand(I);
3518 if (I == MaskOpIdx || Op.getValueType() == MVT::Glue)
3519 continue;
3520 Ops.push_back(Op);
3521 }
3522
3523 // Transitively apply any node glued to our new node.
3524 const auto *Glued = N->getGluedNode();
3525 if (auto *TGlued = Glued->getGluedNode())
3526 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
3527
3529 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3530
3531 if (!N->memoperands_empty())
3532 CurDAG->setNodeMemRefs(Result, N->memoperands());
3533
3534 Result->setFlags(N->getFlags());
3535 ReplaceUses(N, Result);
3536
3537 return true;
3538}
3539
3540static bool IsVMerge(SDNode *N) {
3541 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
3542}
3543
3544static bool IsVMv(SDNode *N) {
3545 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V;
3546}
3547
3548static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) {
3549 switch (LMUL) {
3550 case RISCVII::LMUL_F8:
3551 return RISCV::PseudoVMSET_M_B1;
3552 case RISCVII::LMUL_F4:
3553 return RISCV::PseudoVMSET_M_B2;
3554 case RISCVII::LMUL_F2:
3555 return RISCV::PseudoVMSET_M_B4;
3556 case RISCVII::LMUL_1:
3557 return RISCV::PseudoVMSET_M_B8;
3558 case RISCVII::LMUL_2:
3559 return RISCV::PseudoVMSET_M_B16;
3560 case RISCVII::LMUL_4:
3561 return RISCV::PseudoVMSET_M_B32;
3562 case RISCVII::LMUL_8:
3563 return RISCV::PseudoVMSET_M_B64;
3565 llvm_unreachable("Unexpected LMUL");
3566 }
3567 llvm_unreachable("Unknown VLMUL enum");
3568}
3569
3570// Try to fold away VMERGE_VVM instructions into their true operands:
3571//
3572// %true = PseudoVADD_VV ...
3573// %x = PseudoVMERGE_VVM %false, %false, %true, %mask
3574// ->
3575// %x = PseudoVADD_VV_MASK %false, ..., %mask
3576//
3577// We can only fold if vmerge's merge operand, vmerge's false operand and
3578// %true's merge operand (if it has one) are the same. This is because we have
3579// to consolidate them into one merge operand in the result.
3580//
3581// If %true is masked, then we can use its mask instead of vmerge's if vmerge's
3582// mask is all ones.
3583//
3584// We can also fold a VMV_V_V into its true operand, since it is equivalent to a
3585// VMERGE_VVM with an all ones mask.
3586//
3587// The resulting VL is the minimum of the two VLs.
3588//
3589// The resulting policy is the effective policy the vmerge would have had,
3590// i.e. whether or not it's merge operand was implicit-def.
3591bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
3592 SDValue Merge, False, True, VL, Mask, Glue;
3593 // A vmv.v.v is equivalent to a vmerge with an all-ones mask.
3594 if (IsVMv(N)) {
3595 Merge = N->getOperand(0);
3596 False = N->getOperand(0);
3597 True = N->getOperand(1);
3598 VL = N->getOperand(2);
3599 // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones
3600 // mask later below.
3601 } else {
3602 assert(IsVMerge(N));
3603 Merge = N->getOperand(0);
3604 False = N->getOperand(1);
3605 True = N->getOperand(2);
3606 Mask = N->getOperand(3);
3607 VL = N->getOperand(4);
3608 // We always have a glue node for the mask at v0.
3609 Glue = N->getOperand(N->getNumOperands() - 1);
3610 }
3611 assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
3612 assert(!Glue || Glue.getValueType() == MVT::Glue);
3613
3614 // We require that either merge and false are the same, or that merge
3615 // is undefined.
3616 if (Merge != False && !isImplicitDef(Merge))
3617 return false;
3618
3619 assert(True.getResNo() == 0 &&
3620 "Expect True is the first output of an instruction.");
3621
3622 // Need N is the exactly one using True.
3623 if (!True.hasOneUse())
3624 return false;
3625
3626 if (!True.isMachineOpcode())
3627 return false;
3628
3629 unsigned TrueOpc = True.getMachineOpcode();
3630 const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
3631 uint64_t TrueTSFlags = TrueMCID.TSFlags;
3632 bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
3633
3634 bool IsMasked = false;
3636 RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
3637 if (!Info && HasTiedDest) {
3638 Info = RISCV::getMaskedPseudoInfo(TrueOpc);
3639 IsMasked = true;
3640 }
3641
3642 if (!Info)
3643 return false;
3644
3645 // When Mask is not a true mask, this transformation is illegal for some
3646 // operations whose results are affected by mask, like viota.m.
3647 if (Info->MaskAffectsResult && Mask && !usesAllOnesMask(Mask, Glue))
3648 return false;
3649
3650 // If True has a merge operand then it needs to be the same as vmerge's False,
3651 // since False will be used for the result's merge operand.
3652 if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
3653 // The vmerge instruction must be TU.
3654 // FIXME: This could be relaxed, but we need to handle the policy for the
3655 // resulting op correctly.
3656 if (isImplicitDef(Merge))
3657 return false;
3658 SDValue MergeOpTrue = True->getOperand(0);
3659 if (False != MergeOpTrue)
3660 return false;
3661 }
3662
3663 // If True is masked then the vmerge must have an all 1s mask, since we're
3664 // going to keep the mask from True.
3665 if (IsMasked) {
3666 assert(HasTiedDest && "Expected tied dest");
3667 // The vmerge instruction must be TU.
3668 if (isImplicitDef(Merge))
3669 return false;
3670 // FIXME: Support mask agnostic True instruction which would have an
3671 // undef merge operand.
3672 if (Mask && !usesAllOnesMask(Mask, Glue))
3673 return false;
3674 }
3675
3676 // Skip if True has side effect.
3677 if (TII->get(TrueOpc).hasUnmodeledSideEffects())
3678 return false;
3679
3680 // The last operand of a masked instruction may be glued.
3681 bool HasGlueOp = True->getGluedNode() != nullptr;
3682
3683 // The chain operand may exist either before the glued operands or in the last
3684 // position.
3685 unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
3686 bool HasChainOp =
3687 True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
3688
3689 if (HasChainOp) {
3690 // Avoid creating cycles in the DAG. We must ensure that none of the other
3691 // operands depend on True through it's Chain.
3692 SmallVector<const SDNode *, 4> LoopWorklist;
3694 LoopWorklist.push_back(False.getNode());
3695 if (Mask)
3696 LoopWorklist.push_back(Mask.getNode());
3697 LoopWorklist.push_back(VL.getNode());
3698 if (Glue)
3699 LoopWorklist.push_back(Glue.getNode());
3700 if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
3701 return false;
3702 }
3703
3704 // The vector policy operand may be present for masked intrinsics
3705 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
3706 unsigned TrueVLIndex =
3707 True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3708 SDValue TrueVL = True.getOperand(TrueVLIndex);
3709 SDValue SEW = True.getOperand(TrueVLIndex + 1);
3710
3711 auto GetMinVL = [](SDValue LHS, SDValue RHS) {
3712 if (LHS == RHS)
3713 return LHS;
3714 if (isAllOnesConstant(LHS))
3715 return RHS;
3716 if (isAllOnesConstant(RHS))
3717 return LHS;
3718 auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
3719 auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
3720 if (!CLHS || !CRHS)
3721 return SDValue();
3722 return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
3723 };
3724
3725 // Because N and True must have the same merge operand (or True's operand is
3726 // implicit_def), the "effective" body is the minimum of their VLs.
3727 SDValue OrigVL = VL;
3728 VL = GetMinVL(TrueVL, VL);
3729 if (!VL)
3730 return false;
3731
3732 // If we end up changing the VL or mask of True, then we need to make sure it
3733 // doesn't raise any observable fp exceptions, since changing the active
3734 // elements will affect how fflags is set.
3735 if (TrueVL != VL || !IsMasked)
3736 if (mayRaiseFPException(True.getNode()) &&
3737 !True->getFlags().hasNoFPExcept())
3738 return false;
3739
3740 SDLoc DL(N);
3741
3742 // From the preconditions we checked above, we know the mask and thus glue
3743 // for the result node will be taken from True.
3744 if (IsMasked) {
3745 Mask = True->getOperand(Info->MaskOpIdx);
3746 Glue = True->getOperand(True->getNumOperands() - 1);
3747 assert(Glue.getValueType() == MVT::Glue);
3748 }
3749 // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create
3750 // an all-ones mask to use.
3751 else if (IsVMv(N)) {
3752 unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags;
3753 unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags));
3754 ElementCount EC = N->getValueType(0).getVectorElementCount();
3755 MVT MaskVT = MVT::getVectorVT(MVT::i1, EC);
3756
3757 SDValue AllOnesMask =
3758 SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0);
3760 RISCV::V0, AllOnesMask, SDValue());
3761 Mask = CurDAG->getRegister(RISCV::V0, MaskVT);
3762 Glue = MaskCopy.getValue(1);
3763 }
3764
3765 unsigned MaskedOpc = Info->MaskedPseudo;
3766#ifndef NDEBUG
3767 const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
3769 "Expected instructions with mask have policy operand.");
3770 assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
3771 MCOI::TIED_TO) == 0 &&
3772 "Expected instructions with mask have a tied dest.");
3773#endif
3774
3775 // Use a tumu policy, relaxing it to tail agnostic provided that the merge
3776 // operand is undefined.
3777 //
3778 // However, if the VL became smaller than what the vmerge had originally, then
3779 // elements past VL that were previously in the vmerge's body will have moved
3780 // to the tail. In that case we always need to use tail undisturbed to
3781 // preserve them.
3782 bool MergeVLShrunk = VL != OrigVL;
3783 uint64_t Policy = (isImplicitDef(Merge) && !MergeVLShrunk)
3785 : /*TUMU*/ 0;
3786 SDValue PolicyOp =
3787 CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
3788
3789
3791 Ops.push_back(False);
3792
3793 const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
3794 const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode;
3795 assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx);
3796 Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
3797
3798 Ops.push_back(Mask);
3799
3800 // For unmasked "VOp" with rounding mode operand, that is interfaces like
3801 // (..., rm, vl) or (..., rm, vl, policy).
3802 // Its masked version is (..., vm, rm, vl, policy).
3803 // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
3804 if (HasRoundingMode)
3805 Ops.push_back(True->getOperand(TrueVLIndex - 1));
3806
3807 Ops.append({VL, SEW, PolicyOp});
3808
3809 // Result node should have chain operand of True.
3810 if (HasChainOp)
3811 Ops.push_back(True.getOperand(TrueChainOpIdx));
3812
3813 // Add the glue for the CopyToReg of mask->v0.
3814 Ops.push_back(Glue);
3815
3817 CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
3818 Result->setFlags(True->getFlags());
3819
3820 if (!cast<MachineSDNode>(True)->memoperands_empty())
3821 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
3822
3823 // Replace vmerge.vvm node by Result.
3824 ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
3825
3826 // Replace another value of True. E.g. chain and VL.
3827 for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
3828 ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
3829
3830 return true;
3831}
3832
3833bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
3834 bool MadeChange = false;
3836
3837 while (Position != CurDAG->allnodes_begin()) {
3838 SDNode *N = &*--Position;
3839 if (N->use_empty() || !N->isMachineOpcode())
3840 continue;
3841
3842 if (IsVMerge(N) || IsVMv(N))
3843 MadeChange |= performCombineVMergeAndVOps(N);
3844 }
3845 return MadeChange;
3846}
3847
3848/// If our passthru is an implicit_def, use noreg instead. This side
3849/// steps issues with MachineCSE not being able to CSE expressions with
3850/// IMPLICIT_DEF operands while preserving the semantic intent. See
3851/// pr64282 for context. Note that this transform is the last one
3852/// performed at ISEL DAG to DAG.
3853bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
3854 bool MadeChange = false;
3856
3857 while (Position != CurDAG->allnodes_begin()) {
3858 SDNode *N = &*--Position;
3859 if (N->use_empty() || !N->isMachineOpcode())
3860 continue;
3861
3862 const unsigned Opc = N->getMachineOpcode();
3863 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
3865 !isImplicitDef(N->getOperand(0)))
3866 continue;
3867
3869 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
3870 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
3871 SDValue Op = N->getOperand(I);
3872 Ops.push_back(Op);
3873 }
3874
3876 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3877 Result->setFlags(N->getFlags());
3878 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
3879 ReplaceUses(N, Result);
3880 MadeChange = true;
3881 }
3882 return MadeChange;
3883}
3884
3885
3886// This pass converts a legalized DAG into a RISCV-specific DAG, ready
3887// for instruction scheduling.
3889 CodeGenOptLevel OptLevel) {
3890 return new RISCVDAGToDAGISel(TM, OptLevel);
3891}
3892
3893char RISCVDAGToDAGISel::ID = 0;
3894
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
R600 Clause Merge
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp)
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false)
static bool IsVMv(SDNode *N)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b)
static SDValue findVSplat(SDValue N)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm)
static bool IsVMerge(SDNode *N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define PASS_NAME
Value * RHS
Value * LHS
bool isZero() const
Definition: APFloat.h:1291
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
bool isPosZero() const
Definition: APFloat.h:1306
bool isNegZero() const
Definition: APFloat.h:1307
Class for arbitrary precision integers.
Definition: APInt.h:76
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
This class is used to form a handle around another node that is persistent and is updated across invo...
static StringRef getMemConstraintName(ConstraintCode C)
Definition: InlineAsm.h:467
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
Definition: MCInstrDesc.h:463
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
void selectVSSEG(SDNode *Node, bool IsMasked, bool IsStrided)
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset shoule be all zeros.
bool SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void selectVLSEGFF(SDNode *Node, bool IsMasked)
bool selectFPImm(SDValue N, SDValue &Imm)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
void selectVLXSEG(SDNode *Node, bool IsMasked, bool IsOrdered)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
void selectVSXSEG(SDNode *Node, bool IsMasked, bool IsOrdered)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset, bool IsINX=false)
void selectVLSEG(SDNode *Node, bool IsMasked, bool IsStrided)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
unsigned getXLen() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVII::VLMUL getLMUL(MVT VT)
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
const TargetLowering * TLI
MachineFunction * MF
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
bool mayRaiseFPException(SDNode *Node) const
Return whether the node may raise an FP exception.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:551
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:448
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:531
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:532
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:727
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:560
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
ilist< SDNode >::iterator allnodes_iterator
Definition: SelectionDAG.h:534
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
TargetInstrInfo - Interface to description of machine instruction set.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:345
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:751
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:560
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1248
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:240
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1038
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:784
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:904
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:775
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1228
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1244
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:628
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:706
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:574
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:781
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:799
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:681
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1479
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1530
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1575
static bool hasRoundModeOp(uint64_t TSFlags)
static VLMUL getLMul(uint64_t TSFlags)
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul)
unsigned encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic)
std::optional< unsigned > getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
static const MachineMemOperand::Flags MONontemporalBit1
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:239
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:269
unsigned M1(unsigned Val)
Definition: VE.h:376
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:257
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
@ Add
Sum of integers.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:465
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
bool hasNoFPExcept() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.