LLVM 19.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
20#include "llvm/IR/IntrinsicsRISCV.h"
22#include "llvm/Support/Debug.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE "riscv-isel"
29#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
30
32 "riscv-use-rematerializable-movimm", cl::Hidden,
33 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
34 "constant materialization"),
35 cl::init(false));
36
37namespace llvm::RISCV {
38#define GET_RISCVVSSEGTable_IMPL
39#define GET_RISCVVLSEGTable_IMPL
40#define GET_RISCVVLXSEGTable_IMPL
41#define GET_RISCVVSXSEGTable_IMPL
42#define GET_RISCVVLETable_IMPL
43#define GET_RISCVVSETable_IMPL
44#define GET_RISCVVLXTable_IMPL
45#define GET_RISCVVSXTable_IMPL
46#include "RISCVGenSearchableTables.inc"
47} // namespace llvm::RISCV
48
51
52 bool MadeChange = false;
53 while (Position != CurDAG->allnodes_begin()) {
54 SDNode *N = &*--Position;
55 if (N->use_empty())
56 continue;
57
58 SDValue Result;
59 switch (N->getOpcode()) {
60 case ISD::SPLAT_VECTOR: {
61 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
62 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
63 MVT VT = N->getSimpleValueType(0);
64 unsigned Opc =
66 SDLoc DL(N);
67 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
68 SDValue Src = N->getOperand(0);
69 if (VT.isInteger())
70 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
71 N->getOperand(0));
72 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
73 break;
74 }
76 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
77 // load. Done after lowering and combining so that we have a chance to
78 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
79 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
80 MVT VT = N->getSimpleValueType(0);
81 SDValue Passthru = N->getOperand(0);
82 SDValue Lo = N->getOperand(1);
83 SDValue Hi = N->getOperand(2);
84 SDValue VL = N->getOperand(3);
85 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
86 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
87 "Unexpected VTs!");
89 SDLoc DL(N);
90
91 // Create temporary stack for each expanding node.
92 SDValue StackSlot =
94 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
96
97 SDValue Chain = CurDAG->getEntryNode();
98 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
99
100 SDValue OffsetSlot =
102 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
103 Align(8));
104
105 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
106
107 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
108 SDValue IntID =
109 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
110 SDValue Ops[] = {Chain,
111 IntID,
112 Passthru,
113 StackSlot,
114 CurDAG->getRegister(RISCV::X0, MVT::i64),
115 VL};
116
118 MVT::i64, MPI, Align(8),
120 break;
121 }
122 }
123
124 if (Result) {
125 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
126 LLVM_DEBUG(N->dump(CurDAG));
127 LLVM_DEBUG(dbgs() << "\nNew: ");
128 LLVM_DEBUG(Result->dump(CurDAG));
129 LLVM_DEBUG(dbgs() << "\n");
130
132 MadeChange = true;
133 }
134 }
135
136 if (MadeChange)
138}
139
141 HandleSDNode Dummy(CurDAG->getRoot());
143
144 bool MadeChange = false;
145 while (Position != CurDAG->allnodes_begin()) {
146 SDNode *N = &*--Position;
147 // Skip dead nodes and any non-machine opcodes.
148 if (N->use_empty() || !N->isMachineOpcode())
149 continue;
150
151 MadeChange |= doPeepholeSExtW(N);
152
153 // FIXME: This is here only because the VMerge transform doesn't
154 // know how to handle masked true inputs. Once that has been moved
155 // to post-ISEL, this can be deleted as well.
156 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
157 }
158
159 CurDAG->setRoot(Dummy.getValue());
160
161 MadeChange |= doPeepholeMergeVVMFold();
162
163 // After we're done with everything else, convert IMPLICIT_DEF
164 // passthru operands to NoRegister. This is required to workaround
165 // an optimization deficiency in MachineCSE. This really should
166 // be merged back into each of the patterns (i.e. there's no good
167 // reason not to go directly to NoReg), but is being done this way
168 // to allow easy backporting.
169 MadeChange |= doPeepholeNoRegPassThru();
170
171 if (MadeChange)
173}
174
175static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
177 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
178 for (const RISCVMatInt::Inst &Inst : Seq) {
179 SDValue SDImm = CurDAG->getTargetConstant(Inst.getImm(), DL, VT);
180 SDNode *Result = nullptr;
181 switch (Inst.getOpndKind()) {
182 case RISCVMatInt::Imm:
183 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
184 break;
186 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
187 CurDAG->getRegister(RISCV::X0, VT));
188 break;
190 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
191 break;
193 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
194 break;
195 }
196
197 // Only the first instruction has X0 as its source.
198 SrcReg = SDValue(Result, 0);
199 }
200
201 return SrcReg;
202}
203
204static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
205 int64_t Imm, const RISCVSubtarget &Subtarget) {
207
208 // Use a rematerializable pseudo instruction for short sequences if enabled.
209 if (Seq.size() == 2 && UsePseudoMovImm)
210 return SDValue(
211 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
212 CurDAG->getTargetConstant(Imm, DL, VT)),
213 0);
214
215 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
216 // worst an LUI+ADDIW. This will require an extra register, but avoids a
217 // constant pool.
218 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
219 // low and high 32 bits are the same and bit 31 and 63 are set.
220 if (Seq.size() > 3) {
221 unsigned ShiftAmt, AddOpc;
223 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
224 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
225 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
226
227 SDValue SLLI = SDValue(
228 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
229 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
230 0);
231 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
232 }
233 }
234
235 // Otherwise, use the original sequence.
236 return selectImmSeq(CurDAG, DL, VT, Seq);
237}
238
240 unsigned NF, RISCVII::VLMUL LMUL) {
241 static const unsigned M1TupleRegClassIDs[] = {
242 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
243 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
244 RISCV::VRN8M1RegClassID};
245 static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID,
246 RISCV::VRN3M2RegClassID,
247 RISCV::VRN4M2RegClassID};
248
249 assert(Regs.size() >= 2 && Regs.size() <= 8);
250
251 unsigned RegClassID;
252 unsigned SubReg0;
253 switch (LMUL) {
254 default:
255 llvm_unreachable("Invalid LMUL.");
260 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
261 "Unexpected subreg numbering");
262 SubReg0 = RISCV::sub_vrm1_0;
263 RegClassID = M1TupleRegClassIDs[NF - 2];
264 break;
266 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
267 "Unexpected subreg numbering");
268 SubReg0 = RISCV::sub_vrm2_0;
269 RegClassID = M2TupleRegClassIDs[NF - 2];
270 break;
272 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
273 "Unexpected subreg numbering");
274 SubReg0 = RISCV::sub_vrm4_0;
275 RegClassID = RISCV::VRN2M4RegClassID;
276 break;
277 }
278
279 SDLoc DL(Regs[0]);
281
282 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
283
284 for (unsigned I = 0; I < Regs.size(); ++I) {
285 Ops.push_back(Regs[I]);
286 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
287 }
288 SDNode *N =
289 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
290 return SDValue(N, 0);
291}
292
294 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
295 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
296 bool IsLoad, MVT *IndexVT) {
297 SDValue Chain = Node->getOperand(0);
298 SDValue Glue;
299
300 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
301
302 if (IsStridedOrIndexed) {
303 Operands.push_back(Node->getOperand(CurOp++)); // Index.
304 if (IndexVT)
305 *IndexVT = Operands.back()->getSimpleValueType(0);
306 }
307
308 if (IsMasked) {
309 // Mask needs to be copied to V0.
310 SDValue Mask = Node->getOperand(CurOp++);
311 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
312 Glue = Chain.getValue(1);
313 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
314 }
315 SDValue VL;
316 selectVLOp(Node->getOperand(CurOp++), VL);
317 Operands.push_back(VL);
318
319 MVT XLenVT = Subtarget->getXLenVT();
320 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
321 Operands.push_back(SEWOp);
322
323 // At the IR layer, all the masked load intrinsics have policy operands,
324 // none of the others do. All have passthru operands. For our pseudos,
325 // all loads have policy operands.
326 if (IsLoad) {
328 if (IsMasked)
329 Policy = Node->getConstantOperandVal(CurOp++);
330 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
331 Operands.push_back(PolicyOp);
332 }
333
334 Operands.push_back(Chain); // Chain.
335 if (Glue)
336 Operands.push_back(Glue);
337}
338
339void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
340 bool IsStrided) {
341 SDLoc DL(Node);
342 unsigned NF = Node->getNumValues() - 1;
343 MVT VT = Node->getSimpleValueType(0);
344 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
346
347 unsigned CurOp = 2;
349
350 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
351 Node->op_begin() + CurOp + NF);
352 SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
353 Operands.push_back(Merge);
354 CurOp += NF;
355
356 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
357 Operands, /*IsLoad=*/true);
358
359 const RISCV::VLSEGPseudo *P =
360 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
361 static_cast<unsigned>(LMUL));
362 MachineSDNode *Load =
363 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
364
365 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
366 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
367
368 SDValue SuperReg = SDValue(Load, 0);
369 for (unsigned I = 0; I < NF; ++I) {
370 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
371 ReplaceUses(SDValue(Node, I),
372 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
373 }
374
375 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
376 CurDAG->RemoveDeadNode(Node);
377}
378
379void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
380 SDLoc DL(Node);
381 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain.
382 MVT VT = Node->getSimpleValueType(0);
383 MVT XLenVT = Subtarget->getXLenVT();
384 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
386
387 unsigned CurOp = 2;
389
390 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
391 Node->op_begin() + CurOp + NF);
392 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
393 Operands.push_back(MaskedOff);
394 CurOp += NF;
395
396 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
397 /*IsStridedOrIndexed*/ false, Operands,
398 /*IsLoad=*/true);
399
400 const RISCV::VLSEGPseudo *P =
401 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
402 Log2SEW, static_cast<unsigned>(LMUL));
403 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
404 XLenVT, MVT::Other, Operands);
405
406 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
407 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
408
409 SDValue SuperReg = SDValue(Load, 0);
410 for (unsigned I = 0; I < NF; ++I) {
411 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
412 ReplaceUses(SDValue(Node, I),
413 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
414 }
415
416 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL
417 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain
418 CurDAG->RemoveDeadNode(Node);
419}
420
421void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
422 bool IsOrdered) {
423 SDLoc DL(Node);
424 unsigned NF = Node->getNumValues() - 1;
425 MVT VT = Node->getSimpleValueType(0);
426 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
428
429 unsigned CurOp = 2;
431
432 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
433 Node->op_begin() + CurOp + NF);
434 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
435 Operands.push_back(MaskedOff);
436 CurOp += NF;
437
438 MVT IndexVT;
439 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
440 /*IsStridedOrIndexed*/ true, Operands,
441 /*IsLoad=*/true, &IndexVT);
442
444 "Element count mismatch");
445
446 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
447 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
448 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
449 report_fatal_error("The V extension does not support EEW=64 for index "
450 "values when XLEN=32");
451 }
452 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
453 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
454 static_cast<unsigned>(IndexLMUL));
455 MachineSDNode *Load =
456 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
457
458 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
459 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
460
461 SDValue SuperReg = SDValue(Load, 0);
462 for (unsigned I = 0; I < NF; ++I) {
463 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
464 ReplaceUses(SDValue(Node, I),
465 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
466 }
467
468 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
469 CurDAG->RemoveDeadNode(Node);
470}
471
472void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked,
473 bool IsStrided) {
474 SDLoc DL(Node);
475 unsigned NF = Node->getNumOperands() - 4;
476 if (IsStrided)
477 NF--;
478 if (IsMasked)
479 NF--;
480 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
481 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
483 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
484 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
485
487 Operands.push_back(StoreVal);
488 unsigned CurOp = 2 + NF;
489
490 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
491 Operands);
492
493 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
494 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
495 MachineSDNode *Store =
496 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
497
498 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
499 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
500
501 ReplaceNode(Node, Store);
502}
503
504void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
505 bool IsOrdered) {
506 SDLoc DL(Node);
507 unsigned NF = Node->getNumOperands() - 5;
508 if (IsMasked)
509 --NF;
510 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
511 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
513 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
514 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
515
517 Operands.push_back(StoreVal);
518 unsigned CurOp = 2 + NF;
519
520 MVT IndexVT;
521 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
522 /*IsStridedOrIndexed*/ true, Operands,
523 /*IsLoad=*/false, &IndexVT);
524
526 "Element count mismatch");
527
528 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
529 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
530 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
531 report_fatal_error("The V extension does not support EEW=64 for index "
532 "values when XLEN=32");
533 }
534 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
535 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
536 static_cast<unsigned>(IndexLMUL));
537 MachineSDNode *Store =
538 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
539
540 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
541 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
542
543 ReplaceNode(Node, Store);
544}
545
547 if (!Subtarget->hasVInstructions())
548 return;
549
550 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
551
552 SDLoc DL(Node);
553 MVT XLenVT = Subtarget->getXLenVT();
554
555 unsigned IntNo = Node->getConstantOperandVal(0);
556
557 assert((IntNo == Intrinsic::riscv_vsetvli ||
558 IntNo == Intrinsic::riscv_vsetvlimax) &&
559 "Unexpected vsetvli intrinsic");
560
561 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
562 unsigned Offset = (VLMax ? 1 : 2);
563
564 assert(Node->getNumOperands() == Offset + 2 &&
565 "Unexpected number of operands");
566
567 unsigned SEW =
568 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
569 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
570 Node->getConstantOperandVal(Offset + 1) & 0x7);
571
572 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
573 /*MaskAgnostic*/ true);
574 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
575
576 SDValue VLOperand;
577 unsigned Opcode = RISCV::PseudoVSETVLI;
578 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
579 if (auto VLEN = Subtarget->getRealVLen())
580 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
581 VLMax = true;
582 }
583 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
584 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
585 Opcode = RISCV::PseudoVSETVLIX0;
586 } else {
587 VLOperand = Node->getOperand(1);
588
589 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
590 uint64_t AVL = C->getZExtValue();
591 if (isUInt<5>(AVL)) {
592 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
593 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
594 XLenVT, VLImm, VTypeIOp));
595 return;
596 }
597 }
598 }
599
600 ReplaceNode(Node,
601 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
602}
603
605 MVT VT = Node->getSimpleValueType(0);
606 unsigned Opcode = Node->getOpcode();
607 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
608 "Unexpected opcode");
609 SDLoc DL(Node);
610
611 // For operations of the form (x << C1) op C2, check if we can use
612 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
613 SDValue N0 = Node->getOperand(0);
614 SDValue N1 = Node->getOperand(1);
615
616 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
617 if (!Cst)
618 return false;
619
620 int64_t Val = Cst->getSExtValue();
621
622 // Check if immediate can already use ANDI/ORI/XORI.
623 if (isInt<12>(Val))
624 return false;
625
626 SDValue Shift = N0;
627
628 // If Val is simm32 and we have a sext_inreg from i32, then the binop
629 // produces at least 33 sign bits. We can peek through the sext_inreg and use
630 // a SLLIW at the end.
631 bool SignExt = false;
632 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
633 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
634 SignExt = true;
635 Shift = N0.getOperand(0);
636 }
637
638 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
639 return false;
640
641 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
642 if (!ShlCst)
643 return false;
644
645 uint64_t ShAmt = ShlCst->getZExtValue();
646
647 // Make sure that we don't change the operation by removing bits.
648 // This only matters for OR and XOR, AND is unaffected.
649 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
650 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
651 return false;
652
653 int64_t ShiftedVal = Val >> ShAmt;
654 if (!isInt<12>(ShiftedVal))
655 return false;
656
657 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
658 if (SignExt && ShAmt >= 32)
659 return false;
660
661 // Ok, we can reorder to get a smaller immediate.
662 unsigned BinOpc;
663 switch (Opcode) {
664 default: llvm_unreachable("Unexpected opcode");
665 case ISD::AND: BinOpc = RISCV::ANDI; break;
666 case ISD::OR: BinOpc = RISCV::ORI; break;
667 case ISD::XOR: BinOpc = RISCV::XORI; break;
668 }
669
670 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
671
672 SDNode *BinOp =
673 CurDAG->getMachineNode(BinOpc, DL, VT, Shift.getOperand(0),
674 CurDAG->getTargetConstant(ShiftedVal, DL, VT));
675 SDNode *SLLI =
676 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
677 CurDAG->getTargetConstant(ShAmt, DL, VT));
678 ReplaceNode(Node, SLLI);
679 return true;
680}
681
683 // Only supported with XTHeadBb at the moment.
684 if (!Subtarget->hasVendorXTHeadBb())
685 return false;
686
687 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
688 if (!N1C)
689 return false;
690
691 SDValue N0 = Node->getOperand(0);
692 if (!N0.hasOneUse())
693 return false;
694
695 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
696 MVT VT) {
697 return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
698 CurDAG->getTargetConstant(Msb, DL, VT),
699 CurDAG->getTargetConstant(Lsb, DL, VT));
700 };
701
702 SDLoc DL(Node);
703 MVT VT = Node->getSimpleValueType(0);
704 const unsigned RightShAmt = N1C->getZExtValue();
705
706 // Transform (sra (shl X, C1) C2) with C1 < C2
707 // -> (TH.EXT X, msb, lsb)
708 if (N0.getOpcode() == ISD::SHL) {
709 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
710 if (!N01C)
711 return false;
712
713 const unsigned LeftShAmt = N01C->getZExtValue();
714 // Make sure that this is a bitfield extraction (i.e., the shift-right
715 // amount can not be less than the left-shift).
716 if (LeftShAmt > RightShAmt)
717 return false;
718
719 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
720 const unsigned Msb = MsbPlusOne - 1;
721 const unsigned Lsb = RightShAmt - LeftShAmt;
722
723 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
724 ReplaceNode(Node, TH_EXT);
725 return true;
726 }
727
728 // Transform (sra (sext_inreg X, _), C) ->
729 // (TH.EXT X, msb, lsb)
730 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
731 unsigned ExtSize =
732 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
733
734 // ExtSize of 32 should use sraiw via tablegen pattern.
735 if (ExtSize == 32)
736 return false;
737
738 const unsigned Msb = ExtSize - 1;
739 const unsigned Lsb = RightShAmt;
740
741 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
742 ReplaceNode(Node, TH_EXT);
743 return true;
744 }
745
746 return false;
747}
748
750 // Target does not support indexed loads.
751 if (!Subtarget->hasVendorXTHeadMemIdx())
752 return false;
753
754 LoadSDNode *Ld = cast<LoadSDNode>(Node);
756 if (AM == ISD::UNINDEXED)
757 return false;
758
759 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
760 if (!C)
761 return false;
762
763 EVT LoadVT = Ld->getMemoryVT();
764 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
765 "Unexpected addressing mode");
766 bool IsPre = AM == ISD::PRE_INC;
767 bool IsPost = AM == ISD::POST_INC;
768 int64_t Offset = C->getSExtValue();
769
770 // The constants that can be encoded in the THeadMemIdx instructions
771 // are of the form (sign_extend(imm5) << imm2).
772 int64_t Shift;
773 for (Shift = 0; Shift < 4; Shift++)
774 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
775 break;
776
777 // Constant cannot be encoded.
778 if (Shift == 4)
779 return false;
780
781 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
782 unsigned Opcode;
783 if (LoadVT == MVT::i8 && IsPre)
784 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
785 else if (LoadVT == MVT::i8 && IsPost)
786 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
787 else if (LoadVT == MVT::i16 && IsPre)
788 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
789 else if (LoadVT == MVT::i16 && IsPost)
790 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
791 else if (LoadVT == MVT::i32 && IsPre)
792 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
793 else if (LoadVT == MVT::i32 && IsPost)
794 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
795 else if (LoadVT == MVT::i64 && IsPre)
796 Opcode = RISCV::TH_LDIB;
797 else if (LoadVT == MVT::i64 && IsPost)
798 Opcode = RISCV::TH_LDIA;
799 else
800 return false;
801
802 EVT Ty = Ld->getOffset().getValueType();
803 SDValue Ops[] = {Ld->getBasePtr(),
804 CurDAG->getTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
805 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty),
806 Ld->getChain()};
807 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
808 Ld->getValueType(1), MVT::Other, Ops);
809
810 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
811 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
812
813 ReplaceNode(Node, New);
814
815 return true;
816}
817
819 if (!Subtarget->hasVInstructions())
820 return;
821
822 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
823
824 SDLoc DL(Node);
825 unsigned IntNo = Node->getConstantOperandVal(1);
826
827 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
828 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
829 "Unexpected vsetvli intrinsic");
830
831 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
832 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
833 SDValue SEWOp =
834 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
835 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
836 Node->getOperand(4), Node->getOperand(5),
837 Node->getOperand(8), SEWOp,
838 Node->getOperand(0)};
839
840 unsigned Opcode;
841 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
842 switch (LMulSDNode->getSExtValue()) {
843 case 5:
844 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF8
845 : RISCV::PseudoVC_I_SE_MF8;
846 break;
847 case 6:
848 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF4
849 : RISCV::PseudoVC_I_SE_MF4;
850 break;
851 case 7:
852 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF2
853 : RISCV::PseudoVC_I_SE_MF2;
854 break;
855 case 0:
856 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M1
857 : RISCV::PseudoVC_I_SE_M1;
858 break;
859 case 1:
860 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M2
861 : RISCV::PseudoVC_I_SE_M2;
862 break;
863 case 2:
864 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M4
865 : RISCV::PseudoVC_I_SE_M4;
866 break;
867 case 3:
868 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M8
869 : RISCV::PseudoVC_I_SE_M8;
870 break;
871 }
872
874 Opcode, DL, Node->getSimpleValueType(0), Operands));
875}
876
878 // If we have a custom node, we have already selected.
879 if (Node->isMachineOpcode()) {
880 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
881 Node->setNodeId(-1);
882 return;
883 }
884
885 // Instruction Selection not handled by the auto-generated tablegen selection
886 // should be handled here.
887 unsigned Opcode = Node->getOpcode();
888 MVT XLenVT = Subtarget->getXLenVT();
889 SDLoc DL(Node);
890 MVT VT = Node->getSimpleValueType(0);
891
892 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
893
894 switch (Opcode) {
895 case ISD::Constant: {
896 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
897 auto *ConstNode = cast<ConstantSDNode>(Node);
898 if (ConstNode->isZero()) {
899 SDValue New =
900 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
901 ReplaceNode(Node, New.getNode());
902 return;
903 }
904 int64_t Imm = ConstNode->getSExtValue();
905 // If the upper XLen-16 bits are not used, try to convert this to a simm12
906 // by sign extending bit 15.
907 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
908 hasAllHUsers(Node))
909 Imm = SignExtend64<16>(Imm);
910 // If the upper 32-bits are not used try to convert this into a simm32 by
911 // sign extending bit 32.
912 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
913 Imm = SignExtend64<32>(Imm);
914
915 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
916 return;
917 }
918 case ISD::ConstantFP: {
919 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
920 auto [FPImm, NeedsFNeg] =
921 static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
922 VT);
923 if (FPImm >= 0) {
924 unsigned Opc;
925 unsigned FNegOpc;
926 switch (VT.SimpleTy) {
927 default:
928 llvm_unreachable("Unexpected size");
929 case MVT::f16:
930 Opc = RISCV::FLI_H;
931 FNegOpc = RISCV::FSGNJN_H;
932 break;
933 case MVT::f32:
934 Opc = RISCV::FLI_S;
935 FNegOpc = RISCV::FSGNJN_S;
936 break;
937 case MVT::f64:
938 Opc = RISCV::FLI_D;
939 FNegOpc = RISCV::FSGNJN_D;
940 break;
941 }
943 Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));
944 if (NeedsFNeg)
945 Res = CurDAG->getMachineNode(FNegOpc, DL, VT, SDValue(Res, 0),
946 SDValue(Res, 0));
947
948 ReplaceNode(Node, Res);
949 return;
950 }
951
952 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
953 SDValue Imm;
954 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
955 // create an integer immediate.
956 if (APF.isPosZero() || NegZeroF64)
957 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
958 else
959 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
960 *Subtarget);
961
962 bool HasZdinx = Subtarget->hasStdExtZdinx();
963 bool Is64Bit = Subtarget->is64Bit();
964 unsigned Opc;
965 switch (VT.SimpleTy) {
966 default:
967 llvm_unreachable("Unexpected size");
968 case MVT::bf16:
969 assert(Subtarget->hasStdExtZfbfmin());
970 Opc = RISCV::FMV_H_X;
971 break;
972 case MVT::f16:
973 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
974 break;
975 case MVT::f32:
976 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
977 break;
978 case MVT::f64:
979 // For RV32, we can't move from a GPR, we need to convert instead. This
980 // should only happen for +0.0 and -0.0.
981 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
982 if (Is64Bit)
983 Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
984 else
985 Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
986 break;
987 }
988
989 SDNode *Res;
990 if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
991 Res = CurDAG->getMachineNode(
992 Opc, DL, VT, Imm,
994 else
995 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
996
997 // For f64 -0.0, we need to insert a fneg.d idiom.
998 if (NegZeroF64) {
999 Opc = RISCV::FSGNJN_D;
1000 if (HasZdinx)
1001 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1002 Res =
1003 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1004 }
1005
1006 ReplaceNode(Node, Res);
1007 return;
1008 }
1010 if (!Subtarget->hasStdExtZdinx())
1011 break;
1012
1013 assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1014
1015 SDValue Ops[] = {
1016 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1017 Node->getOperand(0),
1018 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1019 Node->getOperand(1),
1020 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1021
1022 SDNode *N =
1023 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::f64, Ops);
1024 ReplaceNode(Node, N);
1025 return;
1026 }
1027 case RISCVISD::SplitF64: {
1028 if (Subtarget->hasStdExtZdinx()) {
1029 assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1030
1031 if (!SDValue(Node, 0).use_empty()) {
1032 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, VT,
1033 Node->getOperand(0));
1034 ReplaceUses(SDValue(Node, 0), Lo);
1035 }
1036
1037 if (!SDValue(Node, 1).use_empty()) {
1038 SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, VT,
1039 Node->getOperand(0));
1040 ReplaceUses(SDValue(Node, 1), Hi);
1041 }
1042
1043 CurDAG->RemoveDeadNode(Node);
1044 return;
1045 }
1046
1047 if (!Subtarget->hasStdExtZfa())
1048 break;
1049 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1050 "Unexpected subtarget");
1051
1052 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1053 if (!SDValue(Node, 0).use_empty()) {
1054 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1055 Node->getOperand(0));
1056 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1057 }
1058 if (!SDValue(Node, 1).use_empty()) {
1059 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1060 Node->getOperand(0));
1061 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1062 }
1063
1064 CurDAG->RemoveDeadNode(Node);
1065 return;
1066 }
1067 case ISD::SHL: {
1068 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1069 if (!N1C)
1070 break;
1071 SDValue N0 = Node->getOperand(0);
1072 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1073 !isa<ConstantSDNode>(N0.getOperand(1)))
1074 break;
1075 unsigned ShAmt = N1C->getZExtValue();
1076 uint64_t Mask = N0.getConstantOperandVal(1);
1077
1078 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
1079 // 32 leading zeros and C3 trailing zeros.
1080 if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
1081 unsigned XLen = Subtarget->getXLen();
1082 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1083 unsigned TrailingZeros = llvm::countr_zero(Mask);
1084 if (TrailingZeros > 0 && LeadingZeros == 32) {
1085 SDNode *SRLIW = CurDAG->getMachineNode(
1086 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1087 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1088 SDNode *SLLI = CurDAG->getMachineNode(
1089 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1090 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1091 ReplaceNode(Node, SLLI);
1092 return;
1093 }
1094 }
1095 break;
1096 }
1097 case ISD::SRL: {
1098 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1099 if (!N1C)
1100 break;
1101 SDValue N0 = Node->getOperand(0);
1102 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1103 break;
1104 unsigned ShAmt = N1C->getZExtValue();
1105 uint64_t Mask = N0.getConstantOperandVal(1);
1106
1107 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1108 // 32 leading zeros and C3 trailing zeros.
1109 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1110 unsigned XLen = Subtarget->getXLen();
1111 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1112 unsigned TrailingZeros = llvm::countr_zero(Mask);
1113 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1114 SDNode *SRLIW = CurDAG->getMachineNode(
1115 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1116 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1117 SDNode *SLLI = CurDAG->getMachineNode(
1118 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1119 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1120 ReplaceNode(Node, SLLI);
1121 return;
1122 }
1123 }
1124
1125 // Optimize (srl (and X, C2), C) ->
1126 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1127 // Where C2 is a mask with C3 trailing ones.
1128 // Taking into account that the C2 may have had lower bits unset by
1129 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1130 // This pattern occurs when type legalizing right shifts for types with
1131 // less than XLen bits.
1132 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1133 if (!isMask_64(Mask))
1134 break;
1135 unsigned TrailingOnes = llvm::countr_one(Mask);
1136 if (ShAmt >= TrailingOnes)
1137 break;
1138 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1139 if (TrailingOnes == 32) {
1140 SDNode *SRLI = CurDAG->getMachineNode(
1141 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1142 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1143 ReplaceNode(Node, SRLI);
1144 return;
1145 }
1146
1147 // Only do the remaining transforms if the AND has one use.
1148 if (!N0.hasOneUse())
1149 break;
1150
1151 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1152 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1153 SDNode *BEXTI = CurDAG->getMachineNode(
1154 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1155 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1156 ReplaceNode(Node, BEXTI);
1157 return;
1158 }
1159
1160 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1161 SDNode *SLLI =
1162 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1163 CurDAG->getTargetConstant(LShAmt, DL, VT));
1164 SDNode *SRLI = CurDAG->getMachineNode(
1165 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1166 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1167 ReplaceNode(Node, SRLI);
1168 return;
1169 }
1170 case ISD::SRA: {
1171 if (trySignedBitfieldExtract(Node))
1172 return;
1173
1174 // Optimize (sra (sext_inreg X, i16), C) ->
1175 // (srai (slli X, (XLen-16), (XLen-16) + C)
1176 // And (sra (sext_inreg X, i8), C) ->
1177 // (srai (slli X, (XLen-8), (XLen-8) + C)
1178 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1179 // This transform matches the code we get without Zbb. The shifts are more
1180 // compressible, and this can help expose CSE opportunities in the sdiv by
1181 // constant optimization.
1182 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1183 if (!N1C)
1184 break;
1185 SDValue N0 = Node->getOperand(0);
1186 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1187 break;
1188 unsigned ShAmt = N1C->getZExtValue();
1189 unsigned ExtSize =
1190 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1191 // ExtSize of 32 should use sraiw via tablegen pattern.
1192 if (ExtSize >= 32 || ShAmt >= ExtSize)
1193 break;
1194 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1195 SDNode *SLLI =
1196 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1197 CurDAG->getTargetConstant(LShAmt, DL, VT));
1198 SDNode *SRAI = CurDAG->getMachineNode(
1199 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1200 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1201 ReplaceNode(Node, SRAI);
1202 return;
1203 }
1204 case ISD::OR:
1205 case ISD::XOR:
1206 if (tryShrinkShlLogicImm(Node))
1207 return;
1208
1209 break;
1210 case ISD::AND: {
1211 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1212 if (!N1C)
1213 break;
1214 uint64_t C1 = N1C->getZExtValue();
1215 const bool isC1Mask = isMask_64(C1);
1216 const bool isC1ANDI = isInt<12>(C1);
1217
1218 SDValue N0 = Node->getOperand(0);
1219
1220 auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
1221 SDValue X, unsigned Msb,
1222 unsigned Lsb) {
1223 if (!Subtarget->hasVendorXTHeadBb())
1224 return false;
1225
1226 SDNode *TH_EXTU = CurDAG->getMachineNode(
1227 RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
1228 CurDAG->getTargetConstant(Lsb, DL, VT));
1229 ReplaceNode(Node, TH_EXTU);
1230 return true;
1231 };
1232
1233 bool LeftShift = N0.getOpcode() == ISD::SHL;
1234 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1235 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1236 if (!C)
1237 break;
1238 unsigned C2 = C->getZExtValue();
1239 unsigned XLen = Subtarget->getXLen();
1240 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1241
1242 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1243 // shift pair might offer more compression opportunities.
1244 // TODO: We could check for C extension here, but we don't have many lit
1245 // tests with the C extension enabled so not checking gets better
1246 // coverage.
1247 // TODO: What if ANDI faster than shift?
1248 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1249
1250 // Clear irrelevant bits in the mask.
1251 if (LeftShift)
1252 C1 &= maskTrailingZeros<uint64_t>(C2);
1253 else
1254 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1255
1256 // Some transforms should only be done if the shift has a single use or
1257 // the AND would become (srli (slli X, 32), 32)
1258 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1259
1260 SDValue X = N0.getOperand(0);
1261
1262 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1263 // with c3 leading zeros.
1264 if (!LeftShift && isC1Mask) {
1265 unsigned Leading = XLen - llvm::bit_width(C1);
1266 if (C2 < Leading) {
1267 // If the number of leading zeros is C2+32 this can be SRLIW.
1268 if (C2 + 32 == Leading) {
1269 SDNode *SRLIW = CurDAG->getMachineNode(
1270 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1271 ReplaceNode(Node, SRLIW);
1272 return;
1273 }
1274
1275 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1276 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1277 //
1278 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1279 // legalized and goes through DAG combine.
1280 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1281 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1282 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1283 SDNode *SRAIW =
1284 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1285 CurDAG->getTargetConstant(31, DL, VT));
1286 SDNode *SRLIW = CurDAG->getMachineNode(
1287 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1288 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1289 ReplaceNode(Node, SRLIW);
1290 return;
1291 }
1292
1293 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1294 // available.
1295 // Transform (and (srl x, C2), C1)
1296 // -> (<bfextract> x, msb, lsb)
1297 //
1298 // Make sure to keep this below the SRLIW cases, as we always want to
1299 // prefer the more common instruction.
1300 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1301 const unsigned Lsb = C2;
1302 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1303 return;
1304
1305 // (srli (slli x, c3-c2), c3).
1306 // Skip if we could use (zext.w (sraiw X, C2)).
1307 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1308 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1309 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1310 // Also Skip if we can use bexti or th.tst.
1311 Skip |= HasBitTest && Leading == XLen - 1;
1312 if (OneUseOrZExtW && !Skip) {
1313 SDNode *SLLI = CurDAG->getMachineNode(
1314 RISCV::SLLI, DL, VT, X,
1315 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1316 SDNode *SRLI = CurDAG->getMachineNode(
1317 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1318 CurDAG->getTargetConstant(Leading, DL, VT));
1319 ReplaceNode(Node, SRLI);
1320 return;
1321 }
1322 }
1323 }
1324
1325 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1326 // shifted by c2 bits with c3 leading zeros.
1327 if (LeftShift && isShiftedMask_64(C1)) {
1328 unsigned Leading = XLen - llvm::bit_width(C1);
1329
1330 if (C2 + Leading < XLen &&
1331 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1332 // Use slli.uw when possible.
1333 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1334 SDNode *SLLI_UW =
1335 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1336 CurDAG->getTargetConstant(C2, DL, VT));
1337 ReplaceNode(Node, SLLI_UW);
1338 return;
1339 }
1340
1341 // (srli (slli c2+c3), c3)
1342 if (OneUseOrZExtW && !IsCANDI) {
1343 SDNode *SLLI = CurDAG->getMachineNode(
1344 RISCV::SLLI, DL, VT, X,
1345 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1346 SDNode *SRLI = CurDAG->getMachineNode(
1347 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1348 CurDAG->getTargetConstant(Leading, DL, VT));
1349 ReplaceNode(Node, SRLI);
1350 return;
1351 }
1352 }
1353 }
1354
1355 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1356 // shifted mask with c2 leading zeros and c3 trailing zeros.
1357 if (!LeftShift && isShiftedMask_64(C1)) {
1358 unsigned Leading = XLen - llvm::bit_width(C1);
1359 unsigned Trailing = llvm::countr_zero(C1);
1360 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1361 !IsCANDI) {
1362 unsigned SrliOpc = RISCV::SRLI;
1363 // If the input is zexti32 we should use SRLIW.
1364 if (X.getOpcode() == ISD::AND &&
1365 isa<ConstantSDNode>(X.getOperand(1)) &&
1366 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1367 SrliOpc = RISCV::SRLIW;
1368 X = X.getOperand(0);
1369 }
1370 SDNode *SRLI = CurDAG->getMachineNode(
1371 SrliOpc, DL, VT, X,
1372 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1373 SDNode *SLLI = CurDAG->getMachineNode(
1374 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1375 CurDAG->getTargetConstant(Trailing, DL, VT));
1376 ReplaceNode(Node, SLLI);
1377 return;
1378 }
1379 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1380 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1381 OneUseOrZExtW && !IsCANDI) {
1382 SDNode *SRLIW = CurDAG->getMachineNode(
1383 RISCV::SRLIW, DL, VT, X,
1384 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1385 SDNode *SLLI = CurDAG->getMachineNode(
1386 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1387 CurDAG->getTargetConstant(Trailing, DL, VT));
1388 ReplaceNode(Node, SLLI);
1389 return;
1390 }
1391 }
1392
1393 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1394 // shifted mask with no leading zeros and c3 trailing zeros.
1395 if (LeftShift && isShiftedMask_64(C1)) {
1396 unsigned Leading = XLen - llvm::bit_width(C1);
1397 unsigned Trailing = llvm::countr_zero(C1);
1398 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1399 SDNode *SRLI = CurDAG->getMachineNode(
1400 RISCV::SRLI, DL, VT, X,
1401 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1402 SDNode *SLLI = CurDAG->getMachineNode(
1403 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1404 CurDAG->getTargetConstant(Trailing, DL, VT));
1405 ReplaceNode(Node, SLLI);
1406 return;
1407 }
1408 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1409 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1410 SDNode *SRLIW = CurDAG->getMachineNode(
1411 RISCV::SRLIW, DL, VT, X,
1412 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1413 SDNode *SLLI = CurDAG->getMachineNode(
1414 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1415 CurDAG->getTargetConstant(Trailing, DL, VT));
1416 ReplaceNode(Node, SLLI);
1417 return;
1418 }
1419 }
1420 }
1421
1422 // If C1 masks off the upper bits only (but can't be formed as an
1423 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1424 // available.
1425 // Transform (and x, C1)
1426 // -> (<bfextract> x, msb, lsb)
1427 if (isC1Mask && !isC1ANDI) {
1428 const unsigned Msb = llvm::bit_width(C1) - 1;
1429 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1430 return;
1431 }
1432
1433 if (tryShrinkShlLogicImm(Node))
1434 return;
1435
1436 break;
1437 }
1438 case ISD::MUL: {
1439 // Special case for calculating (mul (and X, C2), C1) where the full product
1440 // fits in XLen bits. We can shift X left by the number of leading zeros in
1441 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1442 // product has XLen trailing zeros, putting it in the output of MULHU. This
1443 // can avoid materializing a constant in a register for C2.
1444
1445 // RHS should be a constant.
1446 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1447 if (!N1C || !N1C->hasOneUse())
1448 break;
1449
1450 // LHS should be an AND with constant.
1451 SDValue N0 = Node->getOperand(0);
1452 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1453 break;
1454
1456
1457 // Constant should be a mask.
1458 if (!isMask_64(C2))
1459 break;
1460
1461 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1462 // multiple users or the constant is a simm12. This prevents inserting a
1463 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1464 // make it more costly to materialize. Otherwise, using a SLLI might allow
1465 // it to be compressed.
1466 bool IsANDIOrZExt =
1467 isInt<12>(C2) ||
1468 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1469 // With XTHeadBb, we can use TH.EXTU.
1470 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1471 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1472 break;
1473 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1474 // the constant is a simm32.
1475 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1476 // With XTHeadBb, we can use TH.EXTU.
1477 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1478 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1479 break;
1480
1481 // We need to shift left the AND input and C1 by a total of XLen bits.
1482
1483 // How far left do we need to shift the AND input?
1484 unsigned XLen = Subtarget->getXLen();
1485 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1486
1487 // The constant gets shifted by the remaining amount unless that would
1488 // shift bits out.
1489 uint64_t C1 = N1C->getZExtValue();
1490 unsigned ConstantShift = XLen - LeadingZeros;
1491 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1492 break;
1493
1494 uint64_t ShiftedC1 = C1 << ConstantShift;
1495 // If this RV32, we need to sign extend the constant.
1496 if (XLen == 32)
1497 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1498
1499 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1500 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1501 SDNode *SLLI =
1502 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1503 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1504 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1505 SDValue(SLLI, 0), SDValue(Imm, 0));
1506 ReplaceNode(Node, MULHU);
1507 return;
1508 }
1509 case ISD::LOAD: {
1510 if (tryIndexedLoad(Node))
1511 return;
1512 break;
1513 }
1515 unsigned IntNo = Node->getConstantOperandVal(0);
1516 switch (IntNo) {
1517 // By default we do not custom select any intrinsic.
1518 default:
1519 break;
1520 case Intrinsic::riscv_vmsgeu:
1521 case Intrinsic::riscv_vmsge: {
1522 SDValue Src1 = Node->getOperand(1);
1523 SDValue Src2 = Node->getOperand(2);
1524 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1525 bool IsCmpUnsignedZero = false;
1526 // Only custom select scalar second operand.
1527 if (Src2.getValueType() != XLenVT)
1528 break;
1529 // Small constants are handled with patterns.
1530 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1531 int64_t CVal = C->getSExtValue();
1532 if (CVal >= -15 && CVal <= 16) {
1533 if (!IsUnsigned || CVal != 0)
1534 break;
1535 IsCmpUnsignedZero = true;
1536 }
1537 }
1538 MVT Src1VT = Src1.getSimpleValueType();
1539 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;
1540 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1541 default:
1542 llvm_unreachable("Unexpected LMUL!");
1543#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \
1544 case RISCVII::VLMUL::lmulenum: \
1545 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1546 : RISCV::PseudoVMSLT_VX_##suffix; \
1547 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1548 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \
1549 break;
1550 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)
1551 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)
1552 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)
1554 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)
1555 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)
1556 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)
1557#undef CASE_VMSLT_VMNAND_VMSET_OPCODES
1558 }
1560 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1561 SDValue VL;
1562 selectVLOp(Node->getOperand(3), VL);
1563
1564 // If vmsgeu with 0 immediate, expand it to vmset.
1565 if (IsCmpUnsignedZero) {
1566 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));
1567 return;
1568 }
1569
1570 // Expand to
1571 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1572 SDValue Cmp = SDValue(
1573 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1574 0);
1575 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1576 {Cmp, Cmp, VL, SEW}));
1577 return;
1578 }
1579 case Intrinsic::riscv_vmsgeu_mask:
1580 case Intrinsic::riscv_vmsge_mask: {
1581 SDValue Src1 = Node->getOperand(2);
1582 SDValue Src2 = Node->getOperand(3);
1583 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1584 bool IsCmpUnsignedZero = false;
1585 // Only custom select scalar second operand.
1586 if (Src2.getValueType() != XLenVT)
1587 break;
1588 // Small constants are handled with patterns.
1589 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1590 int64_t CVal = C->getSExtValue();
1591 if (CVal >= -15 && CVal <= 16) {
1592 if (!IsUnsigned || CVal != 0)
1593 break;
1594 IsCmpUnsignedZero = true;
1595 }
1596 }
1597 MVT Src1VT = Src1.getSimpleValueType();
1598 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1599 VMOROpcode;
1600 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1601 default:
1602 llvm_unreachable("Unexpected LMUL!");
1603#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \
1604 case RISCVII::VLMUL::lmulenum: \
1605 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1606 : RISCV::PseudoVMSLT_VX_##suffix; \
1607 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
1608 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
1609 break;
1610 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)
1611 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)
1612 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)
1613 CASE_VMSLT_OPCODES(LMUL_1, M1, B8)
1614 CASE_VMSLT_OPCODES(LMUL_2, M2, B16)
1615 CASE_VMSLT_OPCODES(LMUL_4, M4, B32)
1616 CASE_VMSLT_OPCODES(LMUL_8, M8, B64)
1617#undef CASE_VMSLT_OPCODES
1618 }
1619 // Mask operations use the LMUL from the mask type.
1620 switch (RISCVTargetLowering::getLMUL(VT)) {
1621 default:
1622 llvm_unreachable("Unexpected LMUL!");
1623#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
1624 case RISCVII::VLMUL::lmulenum: \
1625 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
1626 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
1627 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
1628 break;
1629 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)
1630 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)
1631 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)
1636#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1637 }
1639 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1640 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1641 SDValue VL;
1642 selectVLOp(Node->getOperand(5), VL);
1643 SDValue MaskedOff = Node->getOperand(1);
1644 SDValue Mask = Node->getOperand(4);
1645
1646 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.
1647 if (IsCmpUnsignedZero) {
1648 // We don't need vmor if the MaskedOff and the Mask are the same
1649 // value.
1650 if (Mask == MaskedOff) {
1651 ReplaceUses(Node, Mask.getNode());
1652 return;
1653 }
1654 ReplaceNode(Node,
1655 CurDAG->getMachineNode(VMOROpcode, DL, VT,
1656 {Mask, MaskedOff, VL, MaskSEW}));
1657 return;
1658 }
1659
1660 // If the MaskedOff value and the Mask are the same value use
1661 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
1662 // This avoids needing to copy v0 to vd before starting the next sequence.
1663 if (Mask == MaskedOff) {
1664 SDValue Cmp = SDValue(
1665 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1666 0);
1667 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1668 {Mask, Cmp, VL, MaskSEW}));
1669 return;
1670 }
1671
1672 // Mask needs to be copied to V0.
1674 RISCV::V0, Mask, SDValue());
1675 SDValue Glue = Chain.getValue(1);
1676 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1677
1678 // Otherwise use
1679 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1680 // The result is mask undisturbed.
1681 // We use the same instructions to emulate mask agnostic behavior, because
1682 // the agnostic result can be either undisturbed or all 1.
1683 SDValue Cmp = SDValue(
1684 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1685 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1686 0);
1687 // vmxor.mm vd, vd, v0 is used to update active value.
1688 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1689 {Cmp, Mask, VL, MaskSEW}));
1690 return;
1691 }
1692 case Intrinsic::riscv_vsetvli:
1693 case Intrinsic::riscv_vsetvlimax:
1694 return selectVSETVLI(Node);
1695 }
1696 break;
1697 }
1699 unsigned IntNo = Node->getConstantOperandVal(1);
1700 switch (IntNo) {
1701 // By default we do not custom select any intrinsic.
1702 default:
1703 break;
1704 case Intrinsic::riscv_vlseg2:
1705 case Intrinsic::riscv_vlseg3:
1706 case Intrinsic::riscv_vlseg4:
1707 case Intrinsic::riscv_vlseg5:
1708 case Intrinsic::riscv_vlseg6:
1709 case Intrinsic::riscv_vlseg7:
1710 case Intrinsic::riscv_vlseg8: {
1711 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1712 return;
1713 }
1714 case Intrinsic::riscv_vlseg2_mask:
1715 case Intrinsic::riscv_vlseg3_mask:
1716 case Intrinsic::riscv_vlseg4_mask:
1717 case Intrinsic::riscv_vlseg5_mask:
1718 case Intrinsic::riscv_vlseg6_mask:
1719 case Intrinsic::riscv_vlseg7_mask:
1720 case Intrinsic::riscv_vlseg8_mask: {
1721 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1722 return;
1723 }
1724 case Intrinsic::riscv_vlsseg2:
1725 case Intrinsic::riscv_vlsseg3:
1726 case Intrinsic::riscv_vlsseg4:
1727 case Intrinsic::riscv_vlsseg5:
1728 case Intrinsic::riscv_vlsseg6:
1729 case Intrinsic::riscv_vlsseg7:
1730 case Intrinsic::riscv_vlsseg8: {
1731 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1732 return;
1733 }
1734 case Intrinsic::riscv_vlsseg2_mask:
1735 case Intrinsic::riscv_vlsseg3_mask:
1736 case Intrinsic::riscv_vlsseg4_mask:
1737 case Intrinsic::riscv_vlsseg5_mask:
1738 case Intrinsic::riscv_vlsseg6_mask:
1739 case Intrinsic::riscv_vlsseg7_mask:
1740 case Intrinsic::riscv_vlsseg8_mask: {
1741 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1742 return;
1743 }
1744 case Intrinsic::riscv_vloxseg2:
1745 case Intrinsic::riscv_vloxseg3:
1746 case Intrinsic::riscv_vloxseg4:
1747 case Intrinsic::riscv_vloxseg5:
1748 case Intrinsic::riscv_vloxseg6:
1749 case Intrinsic::riscv_vloxseg7:
1750 case Intrinsic::riscv_vloxseg8:
1751 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1752 return;
1753 case Intrinsic::riscv_vluxseg2:
1754 case Intrinsic::riscv_vluxseg3:
1755 case Intrinsic::riscv_vluxseg4:
1756 case Intrinsic::riscv_vluxseg5:
1757 case Intrinsic::riscv_vluxseg6:
1758 case Intrinsic::riscv_vluxseg7:
1759 case Intrinsic::riscv_vluxseg8:
1760 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1761 return;
1762 case Intrinsic::riscv_vloxseg2_mask:
1763 case Intrinsic::riscv_vloxseg3_mask:
1764 case Intrinsic::riscv_vloxseg4_mask:
1765 case Intrinsic::riscv_vloxseg5_mask:
1766 case Intrinsic::riscv_vloxseg6_mask:
1767 case Intrinsic::riscv_vloxseg7_mask:
1768 case Intrinsic::riscv_vloxseg8_mask:
1769 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1770 return;
1771 case Intrinsic::riscv_vluxseg2_mask:
1772 case Intrinsic::riscv_vluxseg3_mask:
1773 case Intrinsic::riscv_vluxseg4_mask:
1774 case Intrinsic::riscv_vluxseg5_mask:
1775 case Intrinsic::riscv_vluxseg6_mask:
1776 case Intrinsic::riscv_vluxseg7_mask:
1777 case Intrinsic::riscv_vluxseg8_mask:
1778 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1779 return;
1780 case Intrinsic::riscv_vlseg8ff:
1781 case Intrinsic::riscv_vlseg7ff:
1782 case Intrinsic::riscv_vlseg6ff:
1783 case Intrinsic::riscv_vlseg5ff:
1784 case Intrinsic::riscv_vlseg4ff:
1785 case Intrinsic::riscv_vlseg3ff:
1786 case Intrinsic::riscv_vlseg2ff: {
1787 selectVLSEGFF(Node, /*IsMasked*/ false);
1788 return;
1789 }
1790 case Intrinsic::riscv_vlseg8ff_mask:
1791 case Intrinsic::riscv_vlseg7ff_mask:
1792 case Intrinsic::riscv_vlseg6ff_mask:
1793 case Intrinsic::riscv_vlseg5ff_mask:
1794 case Intrinsic::riscv_vlseg4ff_mask:
1795 case Intrinsic::riscv_vlseg3ff_mask:
1796 case Intrinsic::riscv_vlseg2ff_mask: {
1797 selectVLSEGFF(Node, /*IsMasked*/ true);
1798 return;
1799 }
1800 case Intrinsic::riscv_vloxei:
1801 case Intrinsic::riscv_vloxei_mask:
1802 case Intrinsic::riscv_vluxei:
1803 case Intrinsic::riscv_vluxei_mask: {
1804 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1805 IntNo == Intrinsic::riscv_vluxei_mask;
1806 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1807 IntNo == Intrinsic::riscv_vloxei_mask;
1808
1809 MVT VT = Node->getSimpleValueType(0);
1810 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1811
1812 unsigned CurOp = 2;
1814 Operands.push_back(Node->getOperand(CurOp++));
1815
1816 MVT IndexVT;
1817 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1818 /*IsStridedOrIndexed*/ true, Operands,
1819 /*IsLoad=*/true, &IndexVT);
1820
1822 "Element count mismatch");
1823
1825 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1826 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1827 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1828 report_fatal_error("The V extension does not support EEW=64 for index "
1829 "values when XLEN=32");
1830 }
1831 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
1832 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
1833 static_cast<unsigned>(IndexLMUL));
1834 MachineSDNode *Load =
1835 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1836
1837 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1838 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1839
1840 ReplaceNode(Node, Load);
1841 return;
1842 }
1843 case Intrinsic::riscv_vlm:
1844 case Intrinsic::riscv_vle:
1845 case Intrinsic::riscv_vle_mask:
1846 case Intrinsic::riscv_vlse:
1847 case Intrinsic::riscv_vlse_mask: {
1848 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
1849 IntNo == Intrinsic::riscv_vlse_mask;
1850 bool IsStrided =
1851 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
1852
1853 MVT VT = Node->getSimpleValueType(0);
1854 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1855
1856 // The riscv_vlm intrinsic are always tail agnostic and no passthru
1857 // operand at the IR level. In pseudos, they have both policy and
1858 // passthru operand. The passthru operand is needed to track the
1859 // "tail undefined" state, and the policy is there just for
1860 // for consistency - it will always be "don't care" for the
1861 // unmasked form.
1862 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
1863 unsigned CurOp = 2;
1865 if (HasPassthruOperand)
1866 Operands.push_back(Node->getOperand(CurOp++));
1867 else {
1868 // We eagerly lower to implicit_def (instead of undef), as we
1869 // otherwise fail to select nodes such as: nxv1i1 = undef
1870 SDNode *Passthru =
1871 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
1872 Operands.push_back(SDValue(Passthru, 0));
1873 }
1874 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1875 Operands, /*IsLoad=*/true);
1876
1878 const RISCV::VLEPseudo *P =
1879 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
1880 static_cast<unsigned>(LMUL));
1881 MachineSDNode *Load =
1882 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1883
1884 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1885 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1886
1887 ReplaceNode(Node, Load);
1888 return;
1889 }
1890 case Intrinsic::riscv_vleff:
1891 case Intrinsic::riscv_vleff_mask: {
1892 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
1893
1894 MVT VT = Node->getSimpleValueType(0);
1895 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1896
1897 unsigned CurOp = 2;
1899 Operands.push_back(Node->getOperand(CurOp++));
1900 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1901 /*IsStridedOrIndexed*/ false, Operands,
1902 /*IsLoad=*/true);
1903
1905 const RISCV::VLEPseudo *P =
1906 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
1907 Log2SEW, static_cast<unsigned>(LMUL));
1909 P->Pseudo, DL, Node->getVTList(), Operands);
1910 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1911 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1912
1913 ReplaceNode(Node, Load);
1914 return;
1915 }
1916 }
1917 break;
1918 }
1919 case ISD::INTRINSIC_VOID: {
1920 unsigned IntNo = Node->getConstantOperandVal(1);
1921 switch (IntNo) {
1922 case Intrinsic::riscv_vsseg2:
1923 case Intrinsic::riscv_vsseg3:
1924 case Intrinsic::riscv_vsseg4:
1925 case Intrinsic::riscv_vsseg5:
1926 case Intrinsic::riscv_vsseg6:
1927 case Intrinsic::riscv_vsseg7:
1928 case Intrinsic::riscv_vsseg8: {
1929 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1930 return;
1931 }
1932 case Intrinsic::riscv_vsseg2_mask:
1933 case Intrinsic::riscv_vsseg3_mask:
1934 case Intrinsic::riscv_vsseg4_mask:
1935 case Intrinsic::riscv_vsseg5_mask:
1936 case Intrinsic::riscv_vsseg6_mask:
1937 case Intrinsic::riscv_vsseg7_mask:
1938 case Intrinsic::riscv_vsseg8_mask: {
1939 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1940 return;
1941 }
1942 case Intrinsic::riscv_vssseg2:
1943 case Intrinsic::riscv_vssseg3:
1944 case Intrinsic::riscv_vssseg4:
1945 case Intrinsic::riscv_vssseg5:
1946 case Intrinsic::riscv_vssseg6:
1947 case Intrinsic::riscv_vssseg7:
1948 case Intrinsic::riscv_vssseg8: {
1949 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1950 return;
1951 }
1952 case Intrinsic::riscv_vssseg2_mask:
1953 case Intrinsic::riscv_vssseg3_mask:
1954 case Intrinsic::riscv_vssseg4_mask:
1955 case Intrinsic::riscv_vssseg5_mask:
1956 case Intrinsic::riscv_vssseg6_mask:
1957 case Intrinsic::riscv_vssseg7_mask:
1958 case Intrinsic::riscv_vssseg8_mask: {
1959 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1960 return;
1961 }
1962 case Intrinsic::riscv_vsoxseg2:
1963 case Intrinsic::riscv_vsoxseg3:
1964 case Intrinsic::riscv_vsoxseg4:
1965 case Intrinsic::riscv_vsoxseg5:
1966 case Intrinsic::riscv_vsoxseg6:
1967 case Intrinsic::riscv_vsoxseg7:
1968 case Intrinsic::riscv_vsoxseg8:
1969 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1970 return;
1971 case Intrinsic::riscv_vsuxseg2:
1972 case Intrinsic::riscv_vsuxseg3:
1973 case Intrinsic::riscv_vsuxseg4:
1974 case Intrinsic::riscv_vsuxseg5:
1975 case Intrinsic::riscv_vsuxseg6:
1976 case Intrinsic::riscv_vsuxseg7:
1977 case Intrinsic::riscv_vsuxseg8:
1978 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1979 return;
1980 case Intrinsic::riscv_vsoxseg2_mask:
1981 case Intrinsic::riscv_vsoxseg3_mask:
1982 case Intrinsic::riscv_vsoxseg4_mask:
1983 case Intrinsic::riscv_vsoxseg5_mask:
1984 case Intrinsic::riscv_vsoxseg6_mask:
1985 case Intrinsic::riscv_vsoxseg7_mask:
1986 case Intrinsic::riscv_vsoxseg8_mask:
1987 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1988 return;
1989 case Intrinsic::riscv_vsuxseg2_mask:
1990 case Intrinsic::riscv_vsuxseg3_mask:
1991 case Intrinsic::riscv_vsuxseg4_mask:
1992 case Intrinsic::riscv_vsuxseg5_mask:
1993 case Intrinsic::riscv_vsuxseg6_mask:
1994 case Intrinsic::riscv_vsuxseg7_mask:
1995 case Intrinsic::riscv_vsuxseg8_mask:
1996 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1997 return;
1998 case Intrinsic::riscv_vsoxei:
1999 case Intrinsic::riscv_vsoxei_mask:
2000 case Intrinsic::riscv_vsuxei:
2001 case Intrinsic::riscv_vsuxei_mask: {
2002 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2003 IntNo == Intrinsic::riscv_vsuxei_mask;
2004 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2005 IntNo == Intrinsic::riscv_vsoxei_mask;
2006
2007 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2008 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2009
2010 unsigned CurOp = 2;
2012 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2013
2014 MVT IndexVT;
2015 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2016 /*IsStridedOrIndexed*/ true, Operands,
2017 /*IsLoad=*/false, &IndexVT);
2018
2020 "Element count mismatch");
2021
2023 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2024 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2025 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2026 report_fatal_error("The V extension does not support EEW=64 for index "
2027 "values when XLEN=32");
2028 }
2029 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2030 IsMasked, IsOrdered, IndexLog2EEW,
2031 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2032 MachineSDNode *Store =
2033 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2034
2035 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2036 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2037
2038 ReplaceNode(Node, Store);
2039 return;
2040 }
2041 case Intrinsic::riscv_vsm:
2042 case Intrinsic::riscv_vse:
2043 case Intrinsic::riscv_vse_mask:
2044 case Intrinsic::riscv_vsse:
2045 case Intrinsic::riscv_vsse_mask: {
2046 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2047 IntNo == Intrinsic::riscv_vsse_mask;
2048 bool IsStrided =
2049 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2050
2051 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2052 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2053
2054 unsigned CurOp = 2;
2056 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2057
2058 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2059 Operands);
2060
2062 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2063 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2064 MachineSDNode *Store =
2065 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2066 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2067 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2068
2069 ReplaceNode(Node, Store);
2070 return;
2071 }
2072 case Intrinsic::riscv_sf_vc_x_se:
2073 case Intrinsic::riscv_sf_vc_i_se:
2074 selectSF_VC_X_SE(Node);
2075 return;
2076 }
2077 break;
2078 }
2079 case ISD::BITCAST: {
2080 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2081 // Just drop bitcasts between vectors if both are fixed or both are
2082 // scalable.
2083 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2084 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2085 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2086 CurDAG->RemoveDeadNode(Node);
2087 return;
2088 }
2089 break;
2090 }
2091 case ISD::INSERT_SUBVECTOR: {
2092 SDValue V = Node->getOperand(0);
2093 SDValue SubV = Node->getOperand(1);
2094 SDLoc DL(SubV);
2095 auto Idx = Node->getConstantOperandVal(2);
2096 MVT SubVecVT = SubV.getSimpleValueType();
2097
2098 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2099 MVT SubVecContainerVT = SubVecVT;
2100 // Establish the correct scalable-vector types for any fixed-length type.
2101 if (SubVecVT.isFixedLengthVector()) {
2102 assert(Idx == 0 && V.isUndef());
2103 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2104 }
2105 MVT ContainerVT = VT;
2106 if (VT.isFixedLengthVector())
2107 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2108
2109 const auto *TRI = Subtarget->getRegisterInfo();
2110 unsigned SubRegIdx;
2111 std::tie(SubRegIdx, Idx) =
2113 ContainerVT, SubVecContainerVT, Idx, TRI);
2114
2115 // If the Idx hasn't been completely eliminated then this is a subvector
2116 // insert which doesn't naturally align to a vector register. These must
2117 // be handled using instructions to manipulate the vector registers.
2118 if (Idx != 0)
2119 break;
2120
2121 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
2122 [[maybe_unused]] bool IsSubVecPartReg =
2123 SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
2124 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
2125 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
2126 assert((!IsSubVecPartReg || V.isUndef()) &&
2127 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2128 "the subvector is smaller than a full-sized register");
2129
2130 // If we haven't set a SubRegIdx, then we must be going between
2131 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2132 if (SubRegIdx == RISCV::NoSubRegister) {
2133 unsigned InRegClassID =
2136 InRegClassID &&
2137 "Unexpected subvector extraction");
2138 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2139 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2140 DL, VT, SubV, RC);
2141 ReplaceNode(Node, NewNode);
2142 return;
2143 }
2144
2145 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2146 ReplaceNode(Node, Insert.getNode());
2147 return;
2148 }
2150 SDValue V = Node->getOperand(0);
2151 auto Idx = Node->getConstantOperandVal(1);
2152 MVT InVT = V.getSimpleValueType();
2153 SDLoc DL(V);
2154
2155 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2156 MVT SubVecContainerVT = VT;
2157 // Establish the correct scalable-vector types for any fixed-length type.
2158 if (VT.isFixedLengthVector()) {
2159 assert(Idx == 0);
2160 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2161 }
2162 if (InVT.isFixedLengthVector())
2163 InVT = TLI.getContainerForFixedLengthVector(InVT);
2164
2165 const auto *TRI = Subtarget->getRegisterInfo();
2166 unsigned SubRegIdx;
2167 std::tie(SubRegIdx, Idx) =
2169 InVT, SubVecContainerVT, Idx, TRI);
2170
2171 // If the Idx hasn't been completely eliminated then this is a subvector
2172 // extract which doesn't naturally align to a vector register. These must
2173 // be handled using instructions to manipulate the vector registers.
2174 if (Idx != 0)
2175 break;
2176
2177 // If we haven't set a SubRegIdx, then we must be going between
2178 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2179 if (SubRegIdx == RISCV::NoSubRegister) {
2180 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2182 InRegClassID &&
2183 "Unexpected subvector extraction");
2184 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2185 SDNode *NewNode =
2186 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2187 ReplaceNode(Node, NewNode);
2188 return;
2189 }
2190
2191 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2192 ReplaceNode(Node, Extract.getNode());
2193 return;
2194 }
2198 case RISCVISD::VFMV_V_F_VL: {
2199 // Try to match splat of a scalar load to a strided load with stride of x0.
2200 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2201 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2202 if (!Node->getOperand(0).isUndef())
2203 break;
2204 SDValue Src = Node->getOperand(1);
2205 auto *Ld = dyn_cast<LoadSDNode>(Src);
2206 // Can't fold load update node because the second
2207 // output is used so that load update node can't be removed.
2208 if (!Ld || Ld->isIndexed())
2209 break;
2210 EVT MemVT = Ld->getMemoryVT();
2211 // The memory VT should be the same size as the element type.
2212 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2213 break;
2214 if (!IsProfitableToFold(Src, Node, Node) ||
2215 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2216 break;
2217
2218 SDValue VL;
2219 if (IsScalarMove) {
2220 // We could deal with more VL if we update the VSETVLI insert pass to
2221 // avoid introducing more VSETVLI.
2222 if (!isOneConstant(Node->getOperand(2)))
2223 break;
2224 selectVLOp(Node->getOperand(2), VL);
2225 } else
2226 selectVLOp(Node->getOperand(2), VL);
2227
2228 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2229 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2230
2231 // If VL=1, then we don't need to do a strided load and can just do a
2232 // regular load.
2233 bool IsStrided = !isOneConstant(VL);
2234
2235 // Only do a strided load if we have optimized zero-stride vector load.
2236 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2237 break;
2238
2240 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2241 Ld->getBasePtr()};
2242 if (IsStrided)
2243 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2245 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2246 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2247
2249 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2250 /*IsMasked*/ false, IsStrided, /*FF*/ false,
2251 Log2SEW, static_cast<unsigned>(LMUL));
2252 MachineSDNode *Load =
2253 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2254 // Update the chain.
2255 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2256 // Record the mem-refs
2257 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2258 // Replace the splat with the vlse.
2259 ReplaceNode(Node, Load);
2260 return;
2261 }
2262 case ISD::PREFETCH:
2263 unsigned Locality = Node->getConstantOperandVal(3);
2264 if (Locality > 2)
2265 break;
2266
2267 if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) {
2268 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2270
2271 int NontemporalLevel = 0;
2272 switch (Locality) {
2273 case 0:
2274 NontemporalLevel = 3; // NTL.ALL
2275 break;
2276 case 1:
2277 NontemporalLevel = 1; // NTL.PALL
2278 break;
2279 case 2:
2280 NontemporalLevel = 0; // NTL.P1
2281 break;
2282 default:
2283 llvm_unreachable("unexpected locality value.");
2284 }
2285
2286 if (NontemporalLevel & 0b1)
2288 if (NontemporalLevel & 0b10)
2290 }
2291 break;
2292 }
2293
2294 // Select the default instruction.
2295 SelectCode(Node);
2296}
2297
2299 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2300 std::vector<SDValue> &OutOps) {
2301 // Always produce a register and immediate operand, as expected by
2302 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2303 switch (ConstraintID) {
2306 SDValue Op0, Op1;
2307 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2308 assert(Found && "SelectAddrRegImm should always succeed");
2309 OutOps.push_back(Op0);
2310 OutOps.push_back(Op1);
2311 return false;
2312 }
2314 OutOps.push_back(Op);
2315 OutOps.push_back(
2316 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2317 return false;
2318 default:
2319 report_fatal_error("Unexpected asm memory constraint " +
2320 InlineAsm::getMemConstraintName(ConstraintID));
2321 }
2322
2323 return true;
2324}
2325
2327 SDValue &Offset) {
2328 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2329 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2330 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2331 return true;
2332 }
2333
2334 return false;
2335}
2336
2337// Select a frame index and an optional immediate offset from an ADD or OR.
2339 SDValue &Offset) {
2341 return true;
2342
2344 return false;
2345
2346 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
2347 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2348 if (isInt<12>(CVal)) {
2349 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
2350 Subtarget->getXLenVT());
2352 Subtarget->getXLenVT());
2353 return true;
2354 }
2355 }
2356
2357 return false;
2358}
2359
2360// Fold constant addresses.
2361static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2362 const MVT VT, const RISCVSubtarget *Subtarget,
2364 bool IsPrefetch = false) {
2365 if (!isa<ConstantSDNode>(Addr))
2366 return false;
2367
2368 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2369
2370 // If the constant is a simm12, we can fold the whole constant and use X0 as
2371 // the base. If the constant can be materialized with LUI+simm12, use LUI as
2372 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2373 int64_t Lo12 = SignExtend64<12>(CVal);
2374 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2375 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2376 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2377 return false;
2378
2379 if (Hi) {
2380 int64_t Hi20 = (Hi >> 12) & 0xfffff;
2381 Base = SDValue(
2382 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2383 CurDAG->getTargetConstant(Hi20, DL, VT)),
2384 0);
2385 } else {
2386 Base = CurDAG->getRegister(RISCV::X0, VT);
2387 }
2388 Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2389 return true;
2390 }
2391
2392 // Ask how constant materialization would handle this constant.
2393 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2394
2395 // If the last instruction would be an ADDI, we can fold its immediate and
2396 // emit the rest of the sequence as the base.
2397 if (Seq.back().getOpcode() != RISCV::ADDI)
2398 return false;
2399 Lo12 = Seq.back().getImm();
2400 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2401 return false;
2402
2403 // Drop the last instruction.
2404 Seq.pop_back();
2405 assert(!Seq.empty() && "Expected more instructions in sequence");
2406
2407 Base = selectImmSeq(CurDAG, DL, VT, Seq);
2408 Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2409 return true;
2410}
2411
2412// Is this ADD instruction only used as the base pointer of scalar loads and
2413// stores?
2415 for (auto *Use : Add->uses()) {
2416 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
2417 Use->getOpcode() != ISD::ATOMIC_LOAD &&
2418 Use->getOpcode() != ISD::ATOMIC_STORE)
2419 return false;
2420 EVT VT = cast<MemSDNode>(Use)->getMemoryVT();
2421 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2422 VT != MVT::f64)
2423 return false;
2424 // Don't allow stores of the value. It must be used as the address.
2425 if (Use->getOpcode() == ISD::STORE &&
2426 cast<StoreSDNode>(Use)->getValue() == Add)
2427 return false;
2428 if (Use->getOpcode() == ISD::ATOMIC_STORE &&
2429 cast<AtomicSDNode>(Use)->getVal() == Add)
2430 return false;
2431 }
2432
2433 return true;
2434}
2435
2437 unsigned MaxShiftAmount,
2439 SDValue &Scale) {
2440 EVT VT = Addr.getSimpleValueType();
2441 auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2442 SDValue &Shift) {
2443 uint64_t ShiftAmt = 0;
2444 Index = N;
2445
2446 if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
2447 // Only match shifts by a value in range [0, MaxShiftAmount].
2448 if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
2449 Index = N.getOperand(0);
2450 ShiftAmt = N.getConstantOperandVal(1);
2451 }
2452 }
2453
2454 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
2455 return ShiftAmt != 0;
2456 };
2457
2458 if (Addr.getOpcode() == ISD::ADD) {
2459 if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2460 SDValue AddrB = Addr.getOperand(0);
2461 if (AddrB.getOpcode() == ISD::ADD &&
2462 UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
2463 !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
2464 isInt<12>(C1->getSExtValue())) {
2465 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2466 SDValue C1Val =
2467 CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
2468 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
2469 AddrB.getOperand(1), C1Val),
2470 0);
2471 return true;
2472 }
2473 } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
2474 Base = Addr.getOperand(1);
2475 return true;
2476 } else {
2477 UnwrapShl(Addr.getOperand(1), Index, Scale);
2478 Base = Addr.getOperand(0);
2479 return true;
2480 }
2481 } else if (UnwrapShl(Addr, Index, Scale)) {
2482 EVT VT = Addr.getValueType();
2483 Base = CurDAG->getRegister(RISCV::X0, VT);
2484 return true;
2485 }
2486
2487 return false;
2488}
2489
2491 SDValue &Offset, bool IsINX) {
2493 return true;
2494
2495 SDLoc DL(Addr);
2496 MVT VT = Addr.getSimpleValueType();
2497
2498 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2499 Base = Addr.getOperand(0);
2500 Offset = Addr.getOperand(1);
2501 return true;
2502 }
2503
2504 int64_t RV32ZdinxRange = IsINX ? 4 : 0;
2506 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2507 if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
2508 Base = Addr.getOperand(0);
2509 if (Base.getOpcode() == RISCVISD::ADD_LO) {
2510 SDValue LoOperand = Base.getOperand(1);
2511 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2512 // If the Lo in (ADD_LO hi, lo) is a global variable's address
2513 // (its low part, really), then we can rely on the alignment of that
2514 // variable to provide a margin of safety before low part can overflow
2515 // the 12 bits of the load/store offset. Check if CVal falls within
2516 // that margin; if so (low part + CVal) can't overflow.
2517 const DataLayout &DL = CurDAG->getDataLayout();
2518 Align Alignment = commonAlignment(
2519 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2520 if (CVal == 0 || Alignment > CVal) {
2521 int64_t CombinedOffset = CVal + GA->getOffset();
2522 Base = Base.getOperand(0);
2524 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2525 CombinedOffset, GA->getTargetFlags());
2526 return true;
2527 }
2528 }
2529 }
2530
2531 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2532 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2533 Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2534 return true;
2535 }
2536 }
2537
2538 // Handle ADD with large immediates.
2539 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2540 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2541 assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
2542 "simm12 not already handled?");
2543
2544 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2545 // an ADDI for part of the offset and fold the rest into the load/store.
2546 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2547 if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
2548 int64_t Adj = CVal < 0 ? -2048 : 2047;
2549 Base = SDValue(
2550 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2551 CurDAG->getTargetConstant(Adj, DL, VT)),
2552 0);
2553 Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);
2554 return true;
2555 }
2556
2557 // For larger immediates, we might be able to save one instruction from
2558 // constant materialization by folding the Lo12 bits of the immediate into
2559 // the address. We should only do this if the ADD is only used by loads and
2560 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2561 // separately with the full materialized immediate creating extra
2562 // instructions.
2563 if (isWorthFoldingAdd(Addr) &&
2564 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2565 Offset)) {
2566 // Insert an ADD instruction with the materialized Hi52 bits.
2567 Base = SDValue(
2568 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2569 0);
2570 return true;
2571 }
2572 }
2573
2574 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset))
2575 return true;
2576
2577 Base = Addr;
2578 Offset = CurDAG->getTargetConstant(0, DL, VT);
2579 return true;
2580}
2581
2582/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2583/// Offset shoule be all zeros.
2585 SDValue &Offset) {
2587 return true;
2588
2589 SDLoc DL(Addr);
2590 MVT VT = Addr.getSimpleValueType();
2591
2593 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2594 if (isInt<12>(CVal)) {
2595 Base = Addr.getOperand(0);
2596
2597 // Early-out if not a valid offset.
2598 if ((CVal & 0b11111) != 0) {
2599 Base = Addr;
2600 Offset = CurDAG->getTargetConstant(0, DL, VT);
2601 return true;
2602 }
2603
2604 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2605 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2606 Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2607 return true;
2608 }
2609 }
2610
2611 // Handle ADD with large immediates.
2612 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2613 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2614 assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
2615 "simm12 not already handled?");
2616
2617 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
2618 // one instruction by folding adjustment (-2048 or 2016) into the address.
2619 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
2620 int64_t Adj = CVal < 0 ? -2048 : 2016;
2621 int64_t AdjustedOffset = CVal - Adj;
2623 RISCV::ADDI, DL, VT, Addr.getOperand(0),
2624 CurDAG->getTargetConstant(AdjustedOffset, DL, VT)),
2625 0);
2626 Offset = CurDAG->getTargetConstant(Adj, DL, VT);
2627 return true;
2628 }
2629
2630 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2631 Offset, true)) {
2632 // Insert an ADD instruction with the materialized Hi52 bits.
2633 Base = SDValue(
2634 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2635 0);
2636 return true;
2637 }
2638 }
2639
2640 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true))
2641 return true;
2642
2643 Base = Addr;
2644 Offset = CurDAG->getTargetConstant(0, DL, VT);
2645 return true;
2646}
2647
2649 SDValue &ShAmt) {
2650 ShAmt = N;
2651
2652 // Peek through zext.
2653 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
2654 ShAmt = ShAmt.getOperand(0);
2655
2656 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
2657 // amount. If there is an AND on the shift amount, we can bypass it if it
2658 // doesn't affect any of those bits.
2659 if (ShAmt.getOpcode() == ISD::AND &&
2660 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2661 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
2662
2663 // Since the max shift amount is a power of 2 we can subtract 1 to make a
2664 // mask that covers the bits needed to represent all shift amounts.
2665 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
2666 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
2667
2668 if (ShMask.isSubsetOf(AndMask)) {
2669 ShAmt = ShAmt.getOperand(0);
2670 } else {
2671 // SimplifyDemandedBits may have optimized the mask so try restoring any
2672 // bits that are known zero.
2673 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
2674 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
2675 return true;
2676 ShAmt = ShAmt.getOperand(0);
2677 }
2678 }
2679
2680 if (ShAmt.getOpcode() == ISD::ADD &&
2681 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2682 uint64_t Imm = ShAmt.getConstantOperandVal(1);
2683 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
2684 // to avoid the ADD.
2685 if (Imm != 0 && Imm % ShiftWidth == 0) {
2686 ShAmt = ShAmt.getOperand(0);
2687 return true;
2688 }
2689 } else if (ShAmt.getOpcode() == ISD::SUB &&
2690 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
2691 uint64_t Imm = ShAmt.getConstantOperandVal(0);
2692 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2693 // generate a NEG instead of a SUB of a constant.
2694 if (Imm != 0 && Imm % ShiftWidth == 0) {
2695 SDLoc DL(ShAmt);
2696 EVT VT = ShAmt.getValueType();
2697 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
2698 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
2699 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
2700 ShAmt.getOperand(1));
2701 ShAmt = SDValue(Neg, 0);
2702 return true;
2703 }
2704 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2705 // to generate a NOT instead of a SUB of a constant.
2706 if (Imm % ShiftWidth == ShiftWidth - 1) {
2707 SDLoc DL(ShAmt);
2708 EVT VT = ShAmt.getValueType();
2709 MachineSDNode *Not =
2710 CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1),
2711 CurDAG->getTargetConstant(-1, DL, VT));
2712 ShAmt = SDValue(Not, 0);
2713 return true;
2714 }
2715 }
2716
2717 return true;
2718}
2719
2720/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2721/// check for equality with 0. This function emits instructions that convert the
2722/// seteq/setne into something that can be compared with 0.
2723/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
2724/// ISD::SETNE).
2726 SDValue &Val) {
2727 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
2728 "Unexpected condition code!");
2729
2730 // We're looking for a setcc.
2731 if (N->getOpcode() != ISD::SETCC)
2732 return false;
2733
2734 // Must be an equality comparison.
2735 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
2736 if (CCVal != ExpectedCCVal)
2737 return false;
2738
2739 SDValue LHS = N->getOperand(0);
2740 SDValue RHS = N->getOperand(1);
2741
2742 if (!LHS.getValueType().isScalarInteger())
2743 return false;
2744
2745 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
2746 if (isNullConstant(RHS)) {
2747 Val = LHS;
2748 return true;
2749 }
2750
2751 SDLoc DL(N);
2752
2753 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
2754 int64_t CVal = C->getSExtValue();
2755 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2756 // non-zero otherwise.
2757 if (CVal == -2048) {
2758 Val =
2760 RISCV::XORI, DL, N->getValueType(0), LHS,
2761 CurDAG->getTargetConstant(CVal, DL, N->getValueType(0))),
2762 0);
2763 return true;
2764 }
2765 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
2766 // LHS is equal to the RHS and non-zero otherwise.
2767 if (isInt<12>(CVal) || CVal == 2048) {
2768 Val =
2770 RISCV::ADDI, DL, N->getValueType(0), LHS,
2771 CurDAG->getTargetConstant(-CVal, DL, N->getValueType(0))),
2772 0);
2773 return true;
2774 }
2775 }
2776
2777 // If nothing else we can XOR the LHS and RHS to produce zero if they are
2778 // equal and a non-zero value if they aren't.
2779 Val = SDValue(
2780 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
2781 return true;
2782}
2783
2785 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
2786 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
2787 Val = N.getOperand(0);
2788 return true;
2789 }
2790
2791 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
2792 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
2793 return N;
2794
2795 SDValue N0 = N.getOperand(0);
2796 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2797 N.getConstantOperandVal(1) == ShiftAmt &&
2798 N0.getConstantOperandVal(1) == ShiftAmt)
2799 return N0.getOperand(0);
2800
2801 return N;
2802 };
2803
2804 MVT VT = N.getSimpleValueType();
2805 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
2806 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
2807 return true;
2808 }
2809
2810 return false;
2811}
2812
2814 if (N.getOpcode() == ISD::AND) {
2815 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
2816 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
2817 Val = N.getOperand(0);
2818 return true;
2819 }
2820 }
2821 MVT VT = N.getSimpleValueType();
2822 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
2823 if (CurDAG->MaskedValueIsZero(N, Mask)) {
2824 Val = N;
2825 return true;
2826 }
2827
2828 return false;
2829}
2830
2831/// Look for various patterns that can be done with a SHL that can be folded
2832/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
2833/// SHXADD we are trying to match.
2835 SDValue &Val) {
2836 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
2837 SDValue N0 = N.getOperand(0);
2838
2839 bool LeftShift = N0.getOpcode() == ISD::SHL;
2840 if ((LeftShift || N0.getOpcode() == ISD::SRL) &&
2841 isa<ConstantSDNode>(N0.getOperand(1))) {
2842 uint64_t Mask = N.getConstantOperandVal(1);
2843 unsigned C2 = N0.getConstantOperandVal(1);
2844
2845 unsigned XLen = Subtarget->getXLen();
2846 if (LeftShift)
2847 Mask &= maskTrailingZeros<uint64_t>(C2);
2848 else
2849 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
2850
2851 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
2852 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
2853 // followed by a SHXADD with c3 for the X amount.
2854 if (isShiftedMask_64(Mask)) {
2855 unsigned Leading = XLen - llvm::bit_width(Mask);
2856 unsigned Trailing = llvm::countr_zero(Mask);
2857 if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
2858 SDLoc DL(N);
2859 EVT VT = N.getValueType();
2861 RISCV::SRLI, DL, VT, N0.getOperand(0),
2862 CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
2863 0);
2864 return true;
2865 }
2866 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
2867 // leading zeros and c3 trailing zeros. We can use an SRLI by C3
2868 // followed by a SHXADD using c3 for the X amount.
2869 if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
2870 SDLoc DL(N);
2871 EVT VT = N.getValueType();
2872 Val = SDValue(
2874 RISCV::SRLI, DL, VT, N0.getOperand(0),
2875 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
2876 0);
2877 return true;
2878 }
2879 }
2880 }
2881 }
2882
2883 bool LeftShift = N.getOpcode() == ISD::SHL;
2884 if ((LeftShift || N.getOpcode() == ISD::SRL) &&
2885 isa<ConstantSDNode>(N.getOperand(1))) {
2886 SDValue N0 = N.getOperand(0);
2887 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
2888 isa<ConstantSDNode>(N0.getOperand(1))) {
2889 uint64_t Mask = N0.getConstantOperandVal(1);
2890 if (isShiftedMask_64(Mask)) {
2891 unsigned C1 = N.getConstantOperandVal(1);
2892 unsigned XLen = Subtarget->getXLen();
2893 unsigned Leading = XLen - llvm::bit_width(Mask);
2894 unsigned Trailing = llvm::countr_zero(Mask);
2895 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
2896 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
2897 if (LeftShift && Leading == 32 && Trailing > 0 &&
2898 (Trailing + C1) == ShAmt) {
2899 SDLoc DL(N);
2900 EVT VT = N.getValueType();
2902 RISCV::SRLIW, DL, VT, N0.getOperand(0),
2903 CurDAG->getTargetConstant(Trailing, DL, VT)),
2904 0);
2905 return true;
2906 }
2907 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
2908 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
2909 if (!LeftShift && Leading == 32 && Trailing > C1 &&
2910 (Trailing - C1) == ShAmt) {
2911 SDLoc DL(N);
2912 EVT VT = N.getValueType();
2914 RISCV::SRLIW, DL, VT, N0.getOperand(0),
2915 CurDAG->getTargetConstant(Trailing, DL, VT)),
2916 0);
2917 return true;
2918 }
2919 }
2920 }
2921 }
2922
2923 return false;
2924}
2925
2926/// Look for various patterns that can be done with a SHL that can be folded
2927/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
2928/// SHXADD_UW we are trying to match.
2930 SDValue &Val) {
2931 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
2932 N.hasOneUse()) {
2933 SDValue N0 = N.getOperand(0);
2934 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2935 N0.hasOneUse()) {
2936 uint64_t Mask = N.getConstantOperandVal(1);
2937 unsigned C2 = N0.getConstantOperandVal(1);
2938
2939 Mask &= maskTrailingZeros<uint64_t>(C2);
2940
2941 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
2942 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
2943 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
2944 if (isShiftedMask_64(Mask)) {
2945 unsigned Leading = llvm::countl_zero(Mask);
2946 unsigned Trailing = llvm::countr_zero(Mask);
2947 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
2948 SDLoc DL(N);
2949 EVT VT = N.getValueType();
2951 RISCV::SLLI, DL, VT, N0.getOperand(0),
2952 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
2953 0);
2954 return true;
2955 }
2956 }
2957 }
2958 }
2959
2960 return false;
2961}
2962
2963static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
2964 unsigned Bits,
2965 const TargetInstrInfo *TII) {
2966 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
2967
2968 if (!MCOpcode)
2969 return false;
2970
2971 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
2972 const uint64_t TSFlags = MCID.TSFlags;
2973 if (!RISCVII::hasSEWOp(TSFlags))
2974 return false;
2975 assert(RISCVII::hasVLOp(TSFlags));
2976
2977 bool HasGlueOp = User->getGluedNode() != nullptr;
2978 unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
2979 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
2980 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
2981 unsigned VLIdx =
2982 User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
2983 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
2984
2985 if (UserOpNo == VLIdx)
2986 return false;
2987
2988 auto NumDemandedBits =
2989 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
2990 return NumDemandedBits && Bits >= *NumDemandedBits;
2991}
2992
2993// Return true if all users of this SDNode* only consume the lower \p Bits.
2994// This can be used to form W instructions for add/sub/mul/shl even when the
2995// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
2996// SimplifyDemandedBits has made it so some users see a sext_inreg and some
2997// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
2998// the add/sub/mul/shl to become non-W instructions. By checking the users we
2999// may be able to use a W instruction and CSE with the other instruction if
3000// this has happened. We could try to detect that the CSE opportunity exists
3001// before doing this, but that would be more complicated.
3003 const unsigned Depth) const {
3004 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3005 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3006 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3007 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3008 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3009 isa<ConstantSDNode>(Node) || Depth != 0) &&
3010 "Unexpected opcode");
3011
3013 return false;
3014
3015 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3016 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3017 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
3018 return false;
3019
3020 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
3021 SDNode *User = *UI;
3022 // Users of this node should have already been instruction selected
3023 if (!User->isMachineOpcode())
3024 return false;
3025
3026 // TODO: Add more opcodes?
3027 switch (User->getMachineOpcode()) {
3028 default:
3029 if (vectorPseudoHasAllNBitUsers(User, UI.getOperandNo(), Bits, TII))
3030 break;
3031 return false;
3032 case RISCV::ADDW:
3033 case RISCV::ADDIW:
3034 case RISCV::SUBW:
3035 case RISCV::MULW:
3036 case RISCV::SLLW:
3037 case RISCV::SLLIW:
3038 case RISCV::SRAW:
3039 case RISCV::SRAIW:
3040 case RISCV::SRLW:
3041 case RISCV::SRLIW:
3042 case RISCV::DIVW:
3043 case RISCV::DIVUW:
3044 case RISCV::REMW:
3045 case RISCV::REMUW:
3046 case RISCV::ROLW:
3047 case RISCV::RORW:
3048 case RISCV::RORIW:
3049 case RISCV::CLZW:
3050 case RISCV::CTZW:
3051 case RISCV::CPOPW:
3052 case RISCV::SLLI_UW:
3053 case RISCV::FMV_W_X:
3054 case RISCV::FCVT_H_W:
3055 case RISCV::FCVT_H_WU:
3056 case RISCV::FCVT_S_W:
3057 case RISCV::FCVT_S_WU:
3058 case RISCV::FCVT_D_W:
3059 case RISCV::FCVT_D_WU:
3060 case RISCV::TH_REVW:
3061 case RISCV::TH_SRRIW:
3062 if (Bits < 32)
3063 return false;
3064 break;
3065 case RISCV::SLL:
3066 case RISCV::SRA:
3067 case RISCV::SRL:
3068 case RISCV::ROL:
3069 case RISCV::ROR:
3070 case RISCV::BSET:
3071 case RISCV::BCLR:
3072 case RISCV::BINV:
3073 // Shift amount operands only use log2(Xlen) bits.
3074 if (UI.getOperandNo() != 1 || Bits < Log2_32(Subtarget->getXLen()))
3075 return false;
3076 break;
3077 case RISCV::SLLI:
3078 // SLLI only uses the lower (XLen - ShAmt) bits.
3079 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
3080 return false;
3081 break;
3082 case RISCV::ANDI:
3083 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
3084 break;
3085 goto RecCheck;
3086 case RISCV::ORI: {
3087 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3088 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
3089 break;
3090 [[fallthrough]];
3091 }
3092 case RISCV::AND:
3093 case RISCV::OR:
3094 case RISCV::XOR:
3095 case RISCV::XORI:
3096 case RISCV::ANDN:
3097 case RISCV::ORN:
3098 case RISCV::XNOR:
3099 case RISCV::SH1ADD:
3100 case RISCV::SH2ADD:
3101 case RISCV::SH3ADD:
3102 RecCheck:
3103 if (hasAllNBitUsers(User, Bits, Depth + 1))
3104 break;
3105 return false;
3106 case RISCV::SRLI: {
3107 unsigned ShAmt = User->getConstantOperandVal(1);
3108 // If we are shifting right by less than Bits, and users don't demand any
3109 // bits that were shifted into [Bits-1:0], then we can consider this as an
3110 // N-Bit user.
3111 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
3112 break;
3113 return false;
3114 }
3115 case RISCV::SEXT_B:
3116 case RISCV::PACKH:
3117 if (Bits < 8)
3118 return false;
3119 break;
3120 case RISCV::SEXT_H:
3121 case RISCV::FMV_H_X:
3122 case RISCV::ZEXT_H_RV32:
3123 case RISCV::ZEXT_H_RV64:
3124 case RISCV::PACKW:
3125 if (Bits < 16)
3126 return false;
3127 break;
3128 case RISCV::PACK:
3129 if (Bits < (Subtarget->getXLen() / 2))
3130 return false;
3131 break;
3132 case RISCV::ADD_UW:
3133 case RISCV::SH1ADD_UW:
3134 case RISCV::SH2ADD_UW:
3135 case RISCV::SH3ADD_UW:
3136 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3137 // 32 bits.
3138 if (UI.getOperandNo() != 0 || Bits < 32)
3139 return false;
3140 break;
3141 case RISCV::SB:
3142 if (UI.getOperandNo() != 0 || Bits < 8)
3143 return false;
3144 break;
3145 case RISCV::SH:
3146 if (UI.getOperandNo() != 0 || Bits < 16)
3147 return false;
3148 break;
3149 case RISCV::SW:
3150 if (UI.getOperandNo() != 0 || Bits < 32)
3151 return false;
3152 break;
3153 }
3154 }
3155
3156 return true;
3157}
3158
3159// Select a constant that can be represented as (sign_extend(imm5) << imm2).
3161 SDValue &Shl2) {
3162 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3163 int64_t Offset = C->getSExtValue();
3164 int64_t Shift;
3165 for (Shift = 0; Shift < 4; Shift++)
3166 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
3167 break;
3168
3169 // Constant cannot be encoded.
3170 if (Shift == 4)
3171 return false;
3172
3173 EVT Ty = N->getValueType(0);
3174 Simm5 = CurDAG->getTargetConstant(Offset >> Shift, SDLoc(N), Ty);
3175 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
3176 return true;
3177 }
3178
3179 return false;
3180}
3181
3182// Select VL as a 5 bit immediate or a value that will become a register. This
3183// allows us to choose betwen VSETIVLI or VSETVLI later.
3185 auto *C = dyn_cast<ConstantSDNode>(N);
3186 if (C && isUInt<5>(C->getZExtValue())) {
3187 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
3188 N->getValueType(0));
3189 } else if (C && C->isAllOnes()) {
3190 // Treat all ones as VLMax.
3192 N->getValueType(0));
3193 } else if (isa<RegisterSDNode>(N) &&
3194 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
3195 // All our VL operands use an operand that allows GPRNoX0 or an immediate
3196 // as the register class. Convert X0 to a special immediate to pass the
3197 // MachineVerifier. This is recognized specially by the vsetvli insertion
3198 // pass.
3200 N->getValueType(0));
3201 } else {
3202 VL = N;
3203 }
3204
3205 return true;
3206}
3207
3209 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
3210 if (!N.getOperand(0).isUndef())
3211 return SDValue();
3212 N = N.getOperand(1);
3213 }
3214 SDValue Splat = N;
3215 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
3216 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
3217 !Splat.getOperand(0).isUndef())
3218 return SDValue();
3219 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
3220 return Splat;
3221}
3222
3225 if (!Splat)
3226 return false;
3227
3228 SplatVal = Splat.getOperand(1);
3229 return true;
3230}
3231
3233 SelectionDAG &DAG,
3234 const RISCVSubtarget &Subtarget,
3235 std::function<bool(int64_t)> ValidateImm) {
3237 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
3238 return false;
3239
3240 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
3241 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
3242 "Unexpected splat operand type");
3243
3244 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3245 // type is wider than the resulting vector element type: an implicit
3246 // truncation first takes place. Therefore, perform a manual
3247 // truncation/sign-extension in order to ignore any truncated bits and catch
3248 // any zero-extended immediate.
3249 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3250 // sign-extending to (XLenVT -1).
3251 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
3252
3253 int64_t SplatImm = SplatConst.getSExtValue();
3254
3255 if (!ValidateImm(SplatImm))
3256 return false;
3257
3258 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
3259 return true;
3260}
3261
3263 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
3264 [](int64_t Imm) { return isInt<5>(Imm); });
3265}
3266
3268 return selectVSplatImmHelper(
3269 N, SplatVal, *CurDAG, *Subtarget,
3270 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
3271}
3272
3274 SDValue &SplatVal) {
3275 return selectVSplatImmHelper(
3276 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
3277 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
3278 });
3279}
3280
3282 SDValue &SplatVal) {
3283 return selectVSplatImmHelper(
3284 N, SplatVal, *CurDAG, *Subtarget,
3285 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
3286}
3287
3289 auto IsExtOrTrunc = [](SDValue N) {
3290 switch (N->getOpcode()) {
3291 case ISD::SIGN_EXTEND:
3292 case ISD::ZERO_EXTEND:
3293 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
3294 // inactive elements will be undef.
3296 case RISCVISD::VSEXT_VL:
3297 case RISCVISD::VZEXT_VL:
3298 return true;
3299 default:
3300 return false;
3301 }
3302 };
3303
3304 // We can have multiple nested nodes, so unravel them all if needed.
3305 while (IsExtOrTrunc(N)) {
3306 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
3307 return false;
3308 N = N->getOperand(0);
3309 }
3310
3311 return selectVSplat(N, SplatVal);
3312}
3313
3315 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3316 if (!CFP)
3317 return false;
3318 const APFloat &APF = CFP->getValueAPF();
3319 // td can handle +0.0 already.
3320 if (APF.isPosZero())
3321 return false;
3322
3323 MVT VT = CFP->getSimpleValueType(0);
3324
3325 // Even if this FPImm requires an additional FNEG (i.e. the second element of
3326 // the returned pair is true) we still prefer FLI + FNEG over immediate
3327 // materialization as the latter might generate a longer instruction sequence.
3328 if (static_cast<const RISCVTargetLowering *>(TLI)
3329 ->getLegalZfaFPImm(APF, VT)
3330 .first >= 0)
3331 return false;
3332
3333 MVT XLenVT = Subtarget->getXLenVT();
3334 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3335 assert(APF.isNegZero() && "Unexpected constant.");
3336 return false;
3337 }
3338 SDLoc DL(N);
3339 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3340 *Subtarget);
3341 return true;
3342}
3343
3345 SDValue &Imm) {
3346 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3347 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3348
3349 if (!isInt<5>(ImmVal))
3350 return false;
3351
3352 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());
3353 return true;
3354 }
3355
3356 return false;
3357}
3358
3359// Try to remove sext.w if the input is a W instruction or can be made into
3360// a W instruction cheaply.
3361bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3362 // Look for the sext.w pattern, addiw rd, rs1, 0.
3363 if (N->getMachineOpcode() != RISCV::ADDIW ||
3364 !isNullConstant(N->getOperand(1)))
3365 return false;
3366
3367 SDValue N0 = N->getOperand(0);
3368 if (!N0.isMachineOpcode())
3369 return false;
3370
3371 switch (N0.getMachineOpcode()) {
3372 default:
3373 break;
3374 case RISCV::ADD:
3375 case RISCV::ADDI:
3376 case RISCV::SUB:
3377 case RISCV::MUL:
3378 case RISCV::SLLI: {
3379 // Convert sext.w+add/sub/mul to their W instructions. This will create
3380 // a new independent instruction. This improves latency.
3381 unsigned Opc;
3382 switch (N0.getMachineOpcode()) {
3383 default:
3384 llvm_unreachable("Unexpected opcode!");
3385 case RISCV::ADD: Opc = RISCV::ADDW; break;
3386 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3387 case RISCV::SUB: Opc = RISCV::SUBW; break;
3388 case RISCV::MUL: Opc = RISCV::MULW; break;
3389 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3390 }
3391
3392 SDValue N00 = N0.getOperand(0);
3393 SDValue N01 = N0.getOperand(1);
3394
3395 // Shift amount needs to be uimm5.
3396 if (N0.getMachineOpcode() == RISCV::SLLI &&
3397 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
3398 break;
3399
3400 SDNode *Result =
3401 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
3402 N00, N01);
3403 ReplaceUses(N, Result);
3404 return true;
3405 }
3406 case RISCV::ADDW:
3407 case RISCV::ADDIW:
3408 case RISCV::SUBW:
3409 case RISCV::MULW:
3410 case RISCV::SLLIW:
3411 case RISCV::PACKW:
3412 case RISCV::TH_MULAW:
3413 case RISCV::TH_MULAH:
3414 case RISCV::TH_MULSW:
3415 case RISCV::TH_MULSH:
3416 if (N0.getValueType() == MVT::i32)
3417 break;
3418
3419 // Result is already sign extended just remove the sext.w.
3420 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
3421 ReplaceUses(N, N0.getNode());
3422 return true;
3423 }
3424
3425 return false;
3426}
3427
3428static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
3429 // Check that we're using V0 as a mask register.
3430 if (!isa<RegisterSDNode>(MaskOp) ||
3431 cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)
3432 return false;
3433
3434 // The glued user defines V0.
3435 const auto *Glued = GlueOp.getNode();
3436
3437 if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
3438 return false;
3439
3440 // Check that we're defining V0 as a mask register.
3441 if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
3442 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
3443 return false;
3444
3445 // Check the instruction defining V0; it needs to be a VMSET pseudo.
3446 SDValue MaskSetter = Glued->getOperand(2);
3447
3448 // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
3449 // from an extract_subvector or insert_subvector.
3450 if (MaskSetter->isMachineOpcode() &&
3451 MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)
3452 MaskSetter = MaskSetter->getOperand(0);
3453
3454 const auto IsVMSet = [](unsigned Opc) {
3455 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
3456 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
3457 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
3458 Opc == RISCV::PseudoVMSET_M_B8;
3459 };
3460
3461 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3462 // undefined behaviour if it's the wrong bitwidth, so we could choose to
3463 // assume that it's all-ones? Same applies to its VL.
3464 return MaskSetter->isMachineOpcode() &&
3465 IsVMSet(MaskSetter.getMachineOpcode());
3466}
3467
3468// Return true if we can make sure mask of N is all-ones mask.
3469static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
3470 return usesAllOnesMask(N->getOperand(MaskOpIdx),
3471 N->getOperand(N->getNumOperands() - 1));
3472}
3473
3474static bool isImplicitDef(SDValue V) {
3475 return V.isMachineOpcode() &&
3476 V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
3477}
3478
3479// Optimize masked RVV pseudo instructions with a known all-ones mask to their
3480// corresponding "unmasked" pseudo versions. The mask we're interested in will
3481// take the form of a V0 physical register operand, with a glued
3482// register-setting instruction.
3483bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
3485 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
3486 if (!I)
3487 return false;
3488
3489 unsigned MaskOpIdx = I->MaskOpIdx;
3490 if (!usesAllOnesMask(N, MaskOpIdx))
3491 return false;
3492
3493 // There are two classes of pseudos in the table - compares and
3494 // everything else. See the comment on RISCVMaskedPseudo for details.
3495 const unsigned Opc = I->UnmaskedPseudo;
3496 const MCInstrDesc &MCID = TII->get(Opc);
3497 const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags);
3498#ifndef NDEBUG
3499 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
3502 "Masked and unmasked pseudos are inconsistent");
3503 const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID);
3504 assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure");
3505#endif
3506
3508 // Skip the merge operand at index 0 if !UseTUPseudo.
3509 for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) {
3510 // Skip the mask, and the Glue.
3511 SDValue Op = N->getOperand(I);
3512 if (I == MaskOpIdx || Op.getValueType() == MVT::Glue)
3513 continue;
3514 Ops.push_back(Op);
3515 }
3516
3517 // Transitively apply any node glued to our new node.
3518 const auto *Glued = N->getGluedNode();
3519 if (auto *TGlued = Glued->getGluedNode())
3520 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
3521
3523 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3524
3525 if (!N->memoperands_empty())
3526 CurDAG->setNodeMemRefs(Result, N->memoperands());
3527
3528 Result->setFlags(N->getFlags());
3529 ReplaceUses(N, Result);
3530
3531 return true;
3532}
3533
3534static bool IsVMerge(SDNode *N) {
3535 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
3536}
3537
3538static bool IsVMv(SDNode *N) {
3539 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V;
3540}
3541
3542static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) {
3543 switch (LMUL) {
3544 case RISCVII::LMUL_F8:
3545 return RISCV::PseudoVMSET_M_B1;
3546 case RISCVII::LMUL_F4:
3547 return RISCV::PseudoVMSET_M_B2;
3548 case RISCVII::LMUL_F2:
3549 return RISCV::PseudoVMSET_M_B4;
3550 case RISCVII::LMUL_1:
3551 return RISCV::PseudoVMSET_M_B8;
3552 case RISCVII::LMUL_2:
3553 return RISCV::PseudoVMSET_M_B16;
3554 case RISCVII::LMUL_4:
3555 return RISCV::PseudoVMSET_M_B32;
3556 case RISCVII::LMUL_8:
3557 return RISCV::PseudoVMSET_M_B64;
3559 llvm_unreachable("Unexpected LMUL");
3560 }
3561 llvm_unreachable("Unknown VLMUL enum");
3562}
3563
3564// Try to fold away VMERGE_VVM instructions into their true operands:
3565//
3566// %true = PseudoVADD_VV ...
3567// %x = PseudoVMERGE_VVM %false, %false, %true, %mask
3568// ->
3569// %x = PseudoVADD_VV_MASK %false, ..., %mask
3570//
3571// We can only fold if vmerge's merge operand, vmerge's false operand and
3572// %true's merge operand (if it has one) are the same. This is because we have
3573// to consolidate them into one merge operand in the result.
3574//
3575// If %true is masked, then we can use its mask instead of vmerge's if vmerge's
3576// mask is all ones.
3577//
3578// We can also fold a VMV_V_V into its true operand, since it is equivalent to a
3579// VMERGE_VVM with an all ones mask.
3580//
3581// The resulting VL is the minimum of the two VLs.
3582//
3583// The resulting policy is the effective policy the vmerge would have had,
3584// i.e. whether or not it's merge operand was implicit-def.
3585bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
3586 SDValue Merge, False, True, VL, Mask, Glue;
3587 // A vmv.v.v is equivalent to a vmerge with an all-ones mask.
3588 if (IsVMv(N)) {
3589 Merge = N->getOperand(0);
3590 False = N->getOperand(0);
3591 True = N->getOperand(1);
3592 VL = N->getOperand(2);
3593 // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones
3594 // mask later below.
3595 } else {
3596 assert(IsVMerge(N));
3597 Merge = N->getOperand(0);
3598 False = N->getOperand(1);
3599 True = N->getOperand(2);
3600 Mask = N->getOperand(3);
3601 VL = N->getOperand(4);
3602 // We always have a glue node for the mask at v0.
3603 Glue = N->getOperand(N->getNumOperands() - 1);
3604 }
3605 assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
3606 assert(!Glue || Glue.getValueType() == MVT::Glue);
3607
3608 // We require that either merge and false are the same, or that merge
3609 // is undefined.
3610 if (Merge != False && !isImplicitDef(Merge))
3611 return false;
3612
3613 assert(True.getResNo() == 0 &&
3614 "Expect True is the first output of an instruction.");
3615
3616 // Need N is the exactly one using True.
3617 if (!True.hasOneUse())
3618 return false;
3619
3620 if (!True.isMachineOpcode())
3621 return false;
3622
3623 unsigned TrueOpc = True.getMachineOpcode();
3624 const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
3625 uint64_t TrueTSFlags = TrueMCID.TSFlags;
3626 bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
3627
3628 bool IsMasked = false;
3630 RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
3631 if (!Info && HasTiedDest) {
3632 Info = RISCV::getMaskedPseudoInfo(TrueOpc);
3633 IsMasked = true;
3634 }
3635
3636 if (!Info)
3637 return false;
3638
3639 // When Mask is not a true mask, this transformation is illegal for some
3640 // operations whose results are affected by mask, like viota.m.
3641 if (Info->MaskAffectsResult && Mask && !usesAllOnesMask(Mask, Glue))
3642 return false;
3643
3644 // If True has a merge operand then it needs to be the same as vmerge's False,
3645 // since False will be used for the result's merge operand.
3646 if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
3647 // The vmerge instruction must be TU.
3648 // FIXME: This could be relaxed, but we need to handle the policy for the
3649 // resulting op correctly.
3650 if (isImplicitDef(Merge))
3651 return false;
3652 SDValue MergeOpTrue = True->getOperand(0);
3653 if (False != MergeOpTrue)
3654 return false;
3655 }
3656
3657 // If True is masked then the vmerge must have an all 1s mask, since we're
3658 // going to keep the mask from True.
3659 if (IsMasked) {
3660 assert(HasTiedDest && "Expected tied dest");
3661 // The vmerge instruction must be TU.
3662 if (isImplicitDef(Merge))
3663 return false;
3664 // FIXME: Support mask agnostic True instruction which would have an
3665 // undef merge operand.
3666 if (Mask && !usesAllOnesMask(Mask, Glue))
3667 return false;
3668 }
3669
3670 // Skip if True has side effect.
3671 // TODO: Support vleff and vlsegff.
3672 if (TII->get(TrueOpc).hasUnmodeledSideEffects())
3673 return false;
3674
3675 // The last operand of a masked instruction may be glued.
3676 bool HasGlueOp = True->getGluedNode() != nullptr;
3677
3678 // The chain operand may exist either before the glued operands or in the last
3679 // position.
3680 unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
3681 bool HasChainOp =
3682 True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
3683
3684 if (HasChainOp) {
3685 // Avoid creating cycles in the DAG. We must ensure that none of the other
3686 // operands depend on True through it's Chain.
3687 SmallVector<const SDNode *, 4> LoopWorklist;
3689 LoopWorklist.push_back(False.getNode());
3690 if (Mask)
3691 LoopWorklist.push_back(Mask.getNode());
3692 LoopWorklist.push_back(VL.getNode());
3693 if (Glue)
3694 LoopWorklist.push_back(Glue.getNode());
3695 if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
3696 return false;
3697 }
3698
3699 // The vector policy operand may be present for masked intrinsics
3700 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
3701 unsigned TrueVLIndex =
3702 True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3703 SDValue TrueVL = True.getOperand(TrueVLIndex);
3704 SDValue SEW = True.getOperand(TrueVLIndex + 1);
3705
3706 auto GetMinVL = [](SDValue LHS, SDValue RHS) {
3707 if (LHS == RHS)
3708 return LHS;
3709 if (isAllOnesConstant(LHS))
3710 return RHS;
3711 if (isAllOnesConstant(RHS))
3712 return LHS;
3713 auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
3714 auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
3715 if (!CLHS || !CRHS)
3716 return SDValue();
3717 return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
3718 };
3719
3720 // Because N and True must have the same merge operand (or True's operand is
3721 // implicit_def), the "effective" body is the minimum of their VLs.
3722 SDValue OrigVL = VL;
3723 VL = GetMinVL(TrueVL, VL);
3724 if (!VL)
3725 return false;
3726
3727 // If we end up changing the VL or mask of True, then we need to make sure it
3728 // doesn't raise any observable fp exceptions, since changing the active
3729 // elements will affect how fflags is set.
3730 if (TrueVL != VL || !IsMasked)
3731 if (mayRaiseFPException(True.getNode()) &&
3732 !True->getFlags().hasNoFPExcept())
3733 return false;
3734
3735 SDLoc DL(N);
3736
3737 // From the preconditions we checked above, we know the mask and thus glue
3738 // for the result node will be taken from True.
3739 if (IsMasked) {
3740 Mask = True->getOperand(Info->MaskOpIdx);
3741 Glue = True->getOperand(True->getNumOperands() - 1);
3742 assert(Glue.getValueType() == MVT::Glue);
3743 }
3744 // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create
3745 // an all-ones mask to use.
3746 else if (IsVMv(N)) {
3747 unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags;
3748 unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags));
3749 ElementCount EC = N->getValueType(0).getVectorElementCount();
3750 MVT MaskVT = MVT::getVectorVT(MVT::i1, EC);
3751
3752 SDValue AllOnesMask =
3753 SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0);
3755 RISCV::V0, AllOnesMask, SDValue());
3756 Mask = CurDAG->getRegister(RISCV::V0, MaskVT);
3757 Glue = MaskCopy.getValue(1);
3758 }
3759
3760 unsigned MaskedOpc = Info->MaskedPseudo;
3761#ifndef NDEBUG
3762 const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
3764 "Expected instructions with mask have policy operand.");
3765 assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
3766 MCOI::TIED_TO) == 0 &&
3767 "Expected instructions with mask have a tied dest.");
3768#endif
3769
3770 // Use a tumu policy, relaxing it to tail agnostic provided that the merge
3771 // operand is undefined.
3772 //
3773 // However, if the VL became smaller than what the vmerge had originally, then
3774 // elements past VL that were previously in the vmerge's body will have moved
3775 // to the tail. In that case we always need to use tail undisturbed to
3776 // preserve them.
3777 bool MergeVLShrunk = VL != OrigVL;
3778 uint64_t Policy = (isImplicitDef(Merge) && !MergeVLShrunk)
3780 : /*TUMU*/ 0;
3781 SDValue PolicyOp =
3782 CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
3783
3784
3786 Ops.push_back(False);
3787
3788 const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
3789 const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode;
3790 assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx);
3791 Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
3792
3793 Ops.push_back(Mask);
3794
3795 // For unmasked "VOp" with rounding mode operand, that is interfaces like
3796 // (..., rm, vl) or (..., rm, vl, policy).
3797 // Its masked version is (..., vm, rm, vl, policy).
3798 // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
3799 if (HasRoundingMode)
3800 Ops.push_back(True->getOperand(TrueVLIndex - 1));
3801
3802 Ops.append({VL, SEW, PolicyOp});
3803
3804 // Result node should have chain operand of True.
3805 if (HasChainOp)
3806 Ops.push_back(True.getOperand(TrueChainOpIdx));
3807
3808 // Add the glue for the CopyToReg of mask->v0.
3809 Ops.push_back(Glue);
3810
3812 CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
3813 Result->setFlags(True->getFlags());
3814
3815 if (!cast<MachineSDNode>(True)->memoperands_empty())
3816 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
3817
3818 // Replace vmerge.vvm node by Result.
3819 ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
3820
3821 // Replace another value of True. E.g. chain and VL.
3822 for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
3823 ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
3824
3825 return true;
3826}
3827
3828bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
3829 bool MadeChange = false;
3831
3832 while (Position != CurDAG->allnodes_begin()) {
3833 SDNode *N = &*--Position;
3834 if (N->use_empty() || !N->isMachineOpcode())
3835 continue;
3836
3837 if (IsVMerge(N) || IsVMv(N))
3838 MadeChange |= performCombineVMergeAndVOps(N);
3839 }
3840 return MadeChange;
3841}
3842
3843/// If our passthru is an implicit_def, use noreg instead. This side
3844/// steps issues with MachineCSE not being able to CSE expressions with
3845/// IMPLICIT_DEF operands while preserving the semantic intent. See
3846/// pr64282 for context. Note that this transform is the last one
3847/// performed at ISEL DAG to DAG.
3848bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
3849 bool MadeChange = false;
3851
3852 while (Position != CurDAG->allnodes_begin()) {
3853 SDNode *N = &*--Position;
3854 if (N->use_empty() || !N->isMachineOpcode())
3855 continue;
3856
3857 const unsigned Opc = N->getMachineOpcode();
3858 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
3860 !isImplicitDef(N->getOperand(0)))
3861 continue;
3862
3864 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
3865 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
3866 SDValue Op = N->getOperand(I);
3867 Ops.push_back(Op);
3868 }
3869
3871 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3872 Result->setFlags(N->getFlags());
3873 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
3874 ReplaceUses(N, Result);
3875 MadeChange = true;
3876 }
3877 return MadeChange;
3878}
3879
3880
3881// This pass converts a legalized DAG into a RISCV-specific DAG, ready
3882// for instruction scheduling.
3884 CodeGenOptLevel OptLevel) {
3885 return new RISCVDAGToDAGISel(TM, OptLevel);
3886}
3887
3888char RISCVDAGToDAGISel::ID = 0;
3889
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
R600 Clause Merge
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp)
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false)
static bool IsVMv(SDNode *N)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b)
static SDValue findVSplat(SDValue N)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm)
static bool IsVMerge(SDNode *N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define PASS_NAME
Value * RHS
Value * LHS
bool isZero() const
Definition: APFloat.h:1291
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
bool isPosZero() const
Definition: APFloat.h:1306
bool isNegZero() const
Definition: APFloat.h:1307
Class for arbitrary precision integers.
Definition: APInt.h:76
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
This class is used to form a handle around another node that is persistent and is updated across invo...
static StringRef getMemConstraintName(ConstraintCode C)
Definition: InlineAsm.h:467
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
Definition: MCInstrDesc.h:463
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
void selectVSSEG(SDNode *Node, bool IsMasked, bool IsStrided)
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset shoule be all zeros.
bool SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void selectVLSEGFF(SDNode *Node, bool IsMasked)
bool selectFPImm(SDValue N, SDValue &Imm)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
void selectVLXSEG(SDNode *Node, bool IsMasked, bool IsOrdered)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
void selectVSXSEG(SDNode *Node, bool IsMasked, bool IsOrdered)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset, bool IsINX=false)
void selectVLSEG(SDNode *Node, bool IsMasked, bool IsStrided)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
unsigned getXLen() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVII::VLMUL getLMUL(MVT VT)
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
const TargetLowering * TLI
MachineFunction * MF
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
bool mayRaiseFPException(SDNode *Node) const
Return whether the node may raise an FP exception.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:551
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:448
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:531
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:532
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:727
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:560
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
ilist< SDNode >::iterator allnodes_iterator
Definition: SelectionDAG.h:534
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
TargetInstrInfo - Interface to description of machine instruction set.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:559
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1247
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1037
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1227
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1243
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:627
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:573
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1478
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1529
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1574
static bool hasRoundModeOp(uint64_t TSFlags)
static VLMUL getLMul(uint64_t TSFlags)
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul)
unsigned encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic)
std::optional< unsigned > getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
static const MachineMemOperand::Flags MONontemporalBit1
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:239
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:269
unsigned M1(unsigned Val)
Definition: VE.h:376
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:257
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
@ Add
Sum of integers.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:465
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
bool hasNoFPExcept() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.