LLVM 20.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
21#include "llvm/IR/IntrinsicsRISCV.h"
23#include "llvm/Support/Debug.h"
26
27using namespace llvm;
28
29#define DEBUG_TYPE "riscv-isel"
30#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
31
33 "riscv-use-rematerializable-movimm", cl::Hidden,
34 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
35 "constant materialization"),
36 cl::init(false));
37
38namespace llvm::RISCV {
39#define GET_RISCVVSSEGTable_IMPL
40#define GET_RISCVVLSEGTable_IMPL
41#define GET_RISCVVLXSEGTable_IMPL
42#define GET_RISCVVSXSEGTable_IMPL
43#define GET_RISCVVLETable_IMPL
44#define GET_RISCVVSETable_IMPL
45#define GET_RISCVVLXTable_IMPL
46#define GET_RISCVVSXTable_IMPL
47#include "RISCVGenSearchableTables.inc"
48} // namespace llvm::RISCV
49
52
53 bool MadeChange = false;
54 while (Position != CurDAG->allnodes_begin()) {
55 SDNode *N = &*--Position;
56 if (N->use_empty())
57 continue;
58
59 SDValue Result;
60 switch (N->getOpcode()) {
61 case ISD::SPLAT_VECTOR: {
62 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
63 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
64 MVT VT = N->getSimpleValueType(0);
65 unsigned Opc =
67 SDLoc DL(N);
68 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
69 SDValue Src = N->getOperand(0);
70 if (VT.isInteger())
71 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
72 N->getOperand(0));
73 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
74 break;
75 }
77 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
78 // load. Done after lowering and combining so that we have a chance to
79 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
80 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
81 MVT VT = N->getSimpleValueType(0);
82 SDValue Passthru = N->getOperand(0);
83 SDValue Lo = N->getOperand(1);
84 SDValue Hi = N->getOperand(2);
85 SDValue VL = N->getOperand(3);
86 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
87 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
88 "Unexpected VTs!");
90 SDLoc DL(N);
91
92 // Create temporary stack for each expanding node.
93 SDValue StackSlot =
95 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
97
98 SDValue Chain = CurDAG->getEntryNode();
99 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
100
101 SDValue OffsetSlot =
103 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
104 Align(8));
105
106 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
107
108 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
109 SDValue IntID =
110 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
111 SDValue Ops[] = {Chain,
112 IntID,
113 Passthru,
114 StackSlot,
115 CurDAG->getRegister(RISCV::X0, MVT::i64),
116 VL};
117
119 MVT::i64, MPI, Align(8),
121 break;
122 }
123 }
124
125 if (Result) {
126 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
127 LLVM_DEBUG(N->dump(CurDAG));
128 LLVM_DEBUG(dbgs() << "\nNew: ");
129 LLVM_DEBUG(Result->dump(CurDAG));
130 LLVM_DEBUG(dbgs() << "\n");
131
133 MadeChange = true;
134 }
135 }
136
137 if (MadeChange)
139}
140
142 HandleSDNode Dummy(CurDAG->getRoot());
144
145 bool MadeChange = false;
146 while (Position != CurDAG->allnodes_begin()) {
147 SDNode *N = &*--Position;
148 // Skip dead nodes and any non-machine opcodes.
149 if (N->use_empty() || !N->isMachineOpcode())
150 continue;
151
152 MadeChange |= doPeepholeSExtW(N);
153
154 // FIXME: This is here only because the VMerge transform doesn't
155 // know how to handle masked true inputs. Once that has been moved
156 // to post-ISEL, this can be deleted as well.
157 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
158 }
159
160 CurDAG->setRoot(Dummy.getValue());
161
162 MadeChange |= doPeepholeMergeVVMFold();
163
164 // After we're done with everything else, convert IMPLICIT_DEF
165 // passthru operands to NoRegister. This is required to workaround
166 // an optimization deficiency in MachineCSE. This really should
167 // be merged back into each of the patterns (i.e. there's no good
168 // reason not to go directly to NoReg), but is being done this way
169 // to allow easy backporting.
170 MadeChange |= doPeepholeNoRegPassThru();
171
172 if (MadeChange)
174}
175
176static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
178 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
179 for (const RISCVMatInt::Inst &Inst : Seq) {
180 SDValue SDImm =
181 CurDAG->getSignedConstant(Inst.getImm(), DL, VT, /*isTarget=*/true);
182 SDNode *Result = nullptr;
183 switch (Inst.getOpndKind()) {
184 case RISCVMatInt::Imm:
185 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
186 break;
188 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
189 CurDAG->getRegister(RISCV::X0, VT));
190 break;
192 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
193 break;
195 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
196 break;
197 }
198
199 // Only the first instruction has X0 as its source.
200 SrcReg = SDValue(Result, 0);
201 }
202
203 return SrcReg;
204}
205
206static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
207 int64_t Imm, const RISCVSubtarget &Subtarget) {
209
210 // Use a rematerializable pseudo instruction for short sequences if enabled.
211 if (Seq.size() == 2 && UsePseudoMovImm)
212 return SDValue(CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
213 CurDAG->getSignedConstant(
214 Imm, DL, VT, /*isTarget=*/true)),
215 0);
216
217 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
218 // worst an LUI+ADDIW. This will require an extra register, but avoids a
219 // constant pool.
220 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
221 // low and high 32 bits are the same and bit 31 and 63 are set.
222 if (Seq.size() > 3) {
223 unsigned ShiftAmt, AddOpc;
225 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
226 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
227 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
228
229 SDValue SLLI = SDValue(
230 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
231 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
232 0);
233 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
234 }
235 }
236
237 // Otherwise, use the original sequence.
238 return selectImmSeq(CurDAG, DL, VT, Seq);
239}
240
242 unsigned NF, RISCVII::VLMUL LMUL) {
243 static const unsigned M1TupleRegClassIDs[] = {
244 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
245 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
246 RISCV::VRN8M1RegClassID};
247 static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID,
248 RISCV::VRN3M2RegClassID,
249 RISCV::VRN4M2RegClassID};
250
251 assert(Regs.size() >= 2 && Regs.size() <= 8);
252
253 unsigned RegClassID;
254 unsigned SubReg0;
255 switch (LMUL) {
256 default:
257 llvm_unreachable("Invalid LMUL.");
262 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
263 "Unexpected subreg numbering");
264 SubReg0 = RISCV::sub_vrm1_0;
265 RegClassID = M1TupleRegClassIDs[NF - 2];
266 break;
268 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
269 "Unexpected subreg numbering");
270 SubReg0 = RISCV::sub_vrm2_0;
271 RegClassID = M2TupleRegClassIDs[NF - 2];
272 break;
274 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
275 "Unexpected subreg numbering");
276 SubReg0 = RISCV::sub_vrm4_0;
277 RegClassID = RISCV::VRN2M4RegClassID;
278 break;
279 }
280
281 SDLoc DL(Regs[0]);
283
284 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
285
286 for (unsigned I = 0; I < Regs.size(); ++I) {
287 Ops.push_back(Regs[I]);
288 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
289 }
290 SDNode *N =
291 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
292 return SDValue(N, 0);
293}
294
296 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
297 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
298 bool IsLoad, MVT *IndexVT) {
299 SDValue Chain = Node->getOperand(0);
300 SDValue Glue;
301
302 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
303
304 if (IsStridedOrIndexed) {
305 Operands.push_back(Node->getOperand(CurOp++)); // Index.
306 if (IndexVT)
307 *IndexVT = Operands.back()->getSimpleValueType(0);
308 }
309
310 if (IsMasked) {
311 // Mask needs to be copied to V0.
312 SDValue Mask = Node->getOperand(CurOp++);
313 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
314 Glue = Chain.getValue(1);
315 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
316 }
317 SDValue VL;
318 selectVLOp(Node->getOperand(CurOp++), VL);
319 Operands.push_back(VL);
320
321 MVT XLenVT = Subtarget->getXLenVT();
322 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
323 Operands.push_back(SEWOp);
324
325 // At the IR layer, all the masked load intrinsics have policy operands,
326 // none of the others do. All have passthru operands. For our pseudos,
327 // all loads have policy operands.
328 if (IsLoad) {
330 if (IsMasked)
331 Policy = Node->getConstantOperandVal(CurOp++);
332 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
333 Operands.push_back(PolicyOp);
334 }
335
336 Operands.push_back(Chain); // Chain.
337 if (Glue)
338 Operands.push_back(Glue);
339}
340
341void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
342 bool IsStrided) {
343 SDLoc DL(Node);
344 unsigned NF = Node->getNumValues() - 1;
345 MVT VT = Node->getSimpleValueType(0);
346 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
348
349 unsigned CurOp = 2;
351
352 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
353 Node->op_begin() + CurOp + NF);
354 SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
355 Operands.push_back(Merge);
356 CurOp += NF;
357
358 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
359 Operands, /*IsLoad=*/true);
360
361 const RISCV::VLSEGPseudo *P =
362 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
363 static_cast<unsigned>(LMUL));
364 MachineSDNode *Load =
365 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
366
367 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
368 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
369
370 SDValue SuperReg = SDValue(Load, 0);
371 for (unsigned I = 0; I < NF; ++I) {
372 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
373 ReplaceUses(SDValue(Node, I),
374 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
375 }
376
377 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
378 CurDAG->RemoveDeadNode(Node);
379}
380
381void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
382 SDLoc DL(Node);
383 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain.
384 MVT VT = Node->getSimpleValueType(0);
385 MVT XLenVT = Subtarget->getXLenVT();
386 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
388
389 unsigned CurOp = 2;
391
392 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
393 Node->op_begin() + CurOp + NF);
394 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
395 Operands.push_back(MaskedOff);
396 CurOp += NF;
397
398 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
399 /*IsStridedOrIndexed*/ false, Operands,
400 /*IsLoad=*/true);
401
402 const RISCV::VLSEGPseudo *P =
403 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
404 Log2SEW, static_cast<unsigned>(LMUL));
405 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
406 XLenVT, MVT::Other, Operands);
407
408 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
409 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
410
411 SDValue SuperReg = SDValue(Load, 0);
412 for (unsigned I = 0; I < NF; ++I) {
413 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
414 ReplaceUses(SDValue(Node, I),
415 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
416 }
417
418 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL
419 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain
420 CurDAG->RemoveDeadNode(Node);
421}
422
423void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
424 bool IsOrdered) {
425 SDLoc DL(Node);
426 unsigned NF = Node->getNumValues() - 1;
427 MVT VT = Node->getSimpleValueType(0);
428 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
430
431 unsigned CurOp = 2;
433
434 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
435 Node->op_begin() + CurOp + NF);
436 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
437 Operands.push_back(MaskedOff);
438 CurOp += NF;
439
440 MVT IndexVT;
441 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
442 /*IsStridedOrIndexed*/ true, Operands,
443 /*IsLoad=*/true, &IndexVT);
444
446 "Element count mismatch");
447
448 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
449 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
450 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
451 report_fatal_error("The V extension does not support EEW=64 for index "
452 "values when XLEN=32");
453 }
454 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
455 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
456 static_cast<unsigned>(IndexLMUL));
457 MachineSDNode *Load =
458 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
459
460 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
461 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
462
463 SDValue SuperReg = SDValue(Load, 0);
464 for (unsigned I = 0; I < NF; ++I) {
465 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
466 ReplaceUses(SDValue(Node, I),
467 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
468 }
469
470 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
471 CurDAG->RemoveDeadNode(Node);
472}
473
474void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked,
475 bool IsStrided) {
476 SDLoc DL(Node);
477 unsigned NF = Node->getNumOperands() - 4;
478 if (IsStrided)
479 NF--;
480 if (IsMasked)
481 NF--;
482 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
483 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
485 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
486 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
487
489 Operands.push_back(StoreVal);
490 unsigned CurOp = 2 + NF;
491
492 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
493 Operands);
494
495 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
496 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
497 MachineSDNode *Store =
498 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
499
500 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
501 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
502
503 ReplaceNode(Node, Store);
504}
505
506void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
507 bool IsOrdered) {
508 SDLoc DL(Node);
509 unsigned NF = Node->getNumOperands() - 5;
510 if (IsMasked)
511 --NF;
512 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
513 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
515 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
516 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
517
519 Operands.push_back(StoreVal);
520 unsigned CurOp = 2 + NF;
521
522 MVT IndexVT;
523 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
524 /*IsStridedOrIndexed*/ true, Operands,
525 /*IsLoad=*/false, &IndexVT);
526
528 "Element count mismatch");
529
530 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
531 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
532 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
533 report_fatal_error("The V extension does not support EEW=64 for index "
534 "values when XLEN=32");
535 }
536 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
537 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
538 static_cast<unsigned>(IndexLMUL));
539 MachineSDNode *Store =
540 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
541
542 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
543 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
544
545 ReplaceNode(Node, Store);
546}
547
549 if (!Subtarget->hasVInstructions())
550 return;
551
552 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
553
554 SDLoc DL(Node);
555 MVT XLenVT = Subtarget->getXLenVT();
556
557 unsigned IntNo = Node->getConstantOperandVal(0);
558
559 assert((IntNo == Intrinsic::riscv_vsetvli ||
560 IntNo == Intrinsic::riscv_vsetvlimax) &&
561 "Unexpected vsetvli intrinsic");
562
563 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
564 unsigned Offset = (VLMax ? 1 : 2);
565
566 assert(Node->getNumOperands() == Offset + 2 &&
567 "Unexpected number of operands");
568
569 unsigned SEW =
570 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
571 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
572 Node->getConstantOperandVal(Offset + 1) & 0x7);
573
574 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
575 /*MaskAgnostic*/ true);
576 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
577
578 SDValue VLOperand;
579 unsigned Opcode = RISCV::PseudoVSETVLI;
580 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
581 if (auto VLEN = Subtarget->getRealVLen())
582 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
583 VLMax = true;
584 }
585 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
586 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
587 Opcode = RISCV::PseudoVSETVLIX0;
588 } else {
589 VLOperand = Node->getOperand(1);
590
591 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
592 uint64_t AVL = C->getZExtValue();
593 if (isUInt<5>(AVL)) {
594 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
595 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
596 XLenVT, VLImm, VTypeIOp));
597 return;
598 }
599 }
600 }
601
602 ReplaceNode(Node,
603 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
604}
605
607 MVT VT = Node->getSimpleValueType(0);
608 unsigned Opcode = Node->getOpcode();
609 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
610 "Unexpected opcode");
611 SDLoc DL(Node);
612
613 // For operations of the form (x << C1) op C2, check if we can use
614 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
615 SDValue N0 = Node->getOperand(0);
616 SDValue N1 = Node->getOperand(1);
617
618 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
619 if (!Cst)
620 return false;
621
622 int64_t Val = Cst->getSExtValue();
623
624 // Check if immediate can already use ANDI/ORI/XORI.
625 if (isInt<12>(Val))
626 return false;
627
628 SDValue Shift = N0;
629
630 // If Val is simm32 and we have a sext_inreg from i32, then the binop
631 // produces at least 33 sign bits. We can peek through the sext_inreg and use
632 // a SLLIW at the end.
633 bool SignExt = false;
634 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
635 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
636 SignExt = true;
637 Shift = N0.getOperand(0);
638 }
639
640 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
641 return false;
642
643 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
644 if (!ShlCst)
645 return false;
646
647 uint64_t ShAmt = ShlCst->getZExtValue();
648
649 // Make sure that we don't change the operation by removing bits.
650 // This only matters for OR and XOR, AND is unaffected.
651 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
652 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
653 return false;
654
655 int64_t ShiftedVal = Val >> ShAmt;
656 if (!isInt<12>(ShiftedVal))
657 return false;
658
659 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
660 if (SignExt && ShAmt >= 32)
661 return false;
662
663 // Ok, we can reorder to get a smaller immediate.
664 unsigned BinOpc;
665 switch (Opcode) {
666 default: llvm_unreachable("Unexpected opcode");
667 case ISD::AND: BinOpc = RISCV::ANDI; break;
668 case ISD::OR: BinOpc = RISCV::ORI; break;
669 case ISD::XOR: BinOpc = RISCV::XORI; break;
670 }
671
672 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
673
674 SDNode *BinOp = CurDAG->getMachineNode(
675 BinOpc, DL, VT, Shift.getOperand(0),
676 CurDAG->getSignedConstant(ShiftedVal, DL, VT, /*isTarget=*/true));
677 SDNode *SLLI =
678 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
679 CurDAG->getTargetConstant(ShAmt, DL, VT));
680 ReplaceNode(Node, SLLI);
681 return true;
682}
683
685 // Only supported with XTHeadBb at the moment.
686 if (!Subtarget->hasVendorXTHeadBb())
687 return false;
688
689 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
690 if (!N1C)
691 return false;
692
693 SDValue N0 = Node->getOperand(0);
694 if (!N0.hasOneUse())
695 return false;
696
697 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
698 MVT VT) {
699 return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
700 CurDAG->getTargetConstant(Msb, DL, VT),
701 CurDAG->getTargetConstant(Lsb, DL, VT));
702 };
703
704 SDLoc DL(Node);
705 MVT VT = Node->getSimpleValueType(0);
706 const unsigned RightShAmt = N1C->getZExtValue();
707
708 // Transform (sra (shl X, C1) C2) with C1 < C2
709 // -> (TH.EXT X, msb, lsb)
710 if (N0.getOpcode() == ISD::SHL) {
711 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
712 if (!N01C)
713 return false;
714
715 const unsigned LeftShAmt = N01C->getZExtValue();
716 // Make sure that this is a bitfield extraction (i.e., the shift-right
717 // amount can not be less than the left-shift).
718 if (LeftShAmt > RightShAmt)
719 return false;
720
721 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
722 const unsigned Msb = MsbPlusOne - 1;
723 const unsigned Lsb = RightShAmt - LeftShAmt;
724
725 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
726 ReplaceNode(Node, TH_EXT);
727 return true;
728 }
729
730 // Transform (sra (sext_inreg X, _), C) ->
731 // (TH.EXT X, msb, lsb)
732 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
733 unsigned ExtSize =
734 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
735
736 // ExtSize of 32 should use sraiw via tablegen pattern.
737 if (ExtSize == 32)
738 return false;
739
740 const unsigned Msb = ExtSize - 1;
741 const unsigned Lsb = RightShAmt;
742
743 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
744 ReplaceNode(Node, TH_EXT);
745 return true;
746 }
747
748 return false;
749}
750
752 // Target does not support indexed loads.
753 if (!Subtarget->hasVendorXTHeadMemIdx())
754 return false;
755
756 LoadSDNode *Ld = cast<LoadSDNode>(Node);
758 if (AM == ISD::UNINDEXED)
759 return false;
760
761 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
762 if (!C)
763 return false;
764
765 EVT LoadVT = Ld->getMemoryVT();
766 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
767 "Unexpected addressing mode");
768 bool IsPre = AM == ISD::PRE_INC;
769 bool IsPost = AM == ISD::POST_INC;
770 int64_t Offset = C->getSExtValue();
771
772 // The constants that can be encoded in the THeadMemIdx instructions
773 // are of the form (sign_extend(imm5) << imm2).
774 int64_t Shift;
775 for (Shift = 0; Shift < 4; Shift++)
776 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
777 break;
778
779 // Constant cannot be encoded.
780 if (Shift == 4)
781 return false;
782
783 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
784 unsigned Opcode;
785 if (LoadVT == MVT::i8 && IsPre)
786 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
787 else if (LoadVT == MVT::i8 && IsPost)
788 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
789 else if (LoadVT == MVT::i16 && IsPre)
790 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
791 else if (LoadVT == MVT::i16 && IsPost)
792 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
793 else if (LoadVT == MVT::i32 && IsPre)
794 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
795 else if (LoadVT == MVT::i32 && IsPost)
796 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
797 else if (LoadVT == MVT::i64 && IsPre)
798 Opcode = RISCV::TH_LDIB;
799 else if (LoadVT == MVT::i64 && IsPost)
800 Opcode = RISCV::TH_LDIA;
801 else
802 return false;
803
804 EVT Ty = Ld->getOffset().getValueType();
805 SDValue Ops[] = {Ld->getBasePtr(),
806 CurDAG->getSignedConstant(Offset >> Shift, SDLoc(Node), Ty,
807 /*isTarget=*/true),
808 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty),
809 Ld->getChain()};
810 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
811 Ld->getValueType(1), MVT::Other, Ops);
812
813 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
814 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
815
816 ReplaceNode(Node, New);
817
818 return true;
819}
820
822 if (!Subtarget->hasVInstructions())
823 return;
824
825 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
826
827 SDLoc DL(Node);
828 unsigned IntNo = Node->getConstantOperandVal(1);
829
830 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
831 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
832 "Unexpected vsetvli intrinsic");
833
834 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
835 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
836 SDValue SEWOp =
837 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
838 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
839 Node->getOperand(4), Node->getOperand(5),
840 Node->getOperand(8), SEWOp,
841 Node->getOperand(0)};
842
843 unsigned Opcode;
844 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
845 switch (LMulSDNode->getSExtValue()) {
846 case 5:
847 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF8
848 : RISCV::PseudoVC_I_SE_MF8;
849 break;
850 case 6:
851 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF4
852 : RISCV::PseudoVC_I_SE_MF4;
853 break;
854 case 7:
855 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF2
856 : RISCV::PseudoVC_I_SE_MF2;
857 break;
858 case 0:
859 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M1
860 : RISCV::PseudoVC_I_SE_M1;
861 break;
862 case 1:
863 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M2
864 : RISCV::PseudoVC_I_SE_M2;
865 break;
866 case 2:
867 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M4
868 : RISCV::PseudoVC_I_SE_M4;
869 break;
870 case 3:
871 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M8
872 : RISCV::PseudoVC_I_SE_M8;
873 break;
874 }
875
877 Opcode, DL, Node->getSimpleValueType(0), Operands));
878}
879
881 // If we have a custom node, we have already selected.
882 if (Node->isMachineOpcode()) {
883 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
884 Node->setNodeId(-1);
885 return;
886 }
887
888 // Instruction Selection not handled by the auto-generated tablegen selection
889 // should be handled here.
890 unsigned Opcode = Node->getOpcode();
891 MVT XLenVT = Subtarget->getXLenVT();
892 SDLoc DL(Node);
893 MVT VT = Node->getSimpleValueType(0);
894
895 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
896
897 switch (Opcode) {
898 case ISD::Constant: {
899 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
900 auto *ConstNode = cast<ConstantSDNode>(Node);
901 if (ConstNode->isZero()) {
902 SDValue New =
903 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
904 ReplaceNode(Node, New.getNode());
905 return;
906 }
907 int64_t Imm = ConstNode->getSExtValue();
908 // If only the lower 8 bits are used, try to convert this to a simm6 by
909 // sign-extending bit 7. This is neutral without the C extension, and
910 // allows C.LI to be used if C is present.
911 if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
912 Imm = SignExtend64<8>(Imm);
913 // If the upper XLen-16 bits are not used, try to convert this to a simm12
914 // by sign extending bit 15.
915 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
916 hasAllHUsers(Node))
917 Imm = SignExtend64<16>(Imm);
918 // If the upper 32-bits are not used try to convert this into a simm32 by
919 // sign extending bit 32.
920 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
921 Imm = SignExtend64<32>(Imm);
922
923 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
924 return;
925 }
926 case ISD::ConstantFP: {
927 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
928 auto [FPImm, NeedsFNeg] =
929 static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
930 VT);
931 if (FPImm >= 0) {
932 unsigned Opc;
933 unsigned FNegOpc;
934 switch (VT.SimpleTy) {
935 default:
936 llvm_unreachable("Unexpected size");
937 case MVT::f16:
938 Opc = RISCV::FLI_H;
939 FNegOpc = RISCV::FSGNJN_H;
940 break;
941 case MVT::f32:
942 Opc = RISCV::FLI_S;
943 FNegOpc = RISCV::FSGNJN_S;
944 break;
945 case MVT::f64:
946 Opc = RISCV::FLI_D;
947 FNegOpc = RISCV::FSGNJN_D;
948 break;
949 }
951 Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));
952 if (NeedsFNeg)
953 Res = CurDAG->getMachineNode(FNegOpc, DL, VT, SDValue(Res, 0),
954 SDValue(Res, 0));
955
956 ReplaceNode(Node, Res);
957 return;
958 }
959
960 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
961 SDValue Imm;
962 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
963 // create an integer immediate.
964 if (APF.isPosZero() || NegZeroF64)
965 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
966 else
967 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
968 *Subtarget);
969
970 bool HasZdinx = Subtarget->hasStdExtZdinx();
971 bool Is64Bit = Subtarget->is64Bit();
972 unsigned Opc;
973 switch (VT.SimpleTy) {
974 default:
975 llvm_unreachable("Unexpected size");
976 case MVT::bf16:
977 assert(Subtarget->hasStdExtZfbfmin());
978 Opc = RISCV::FMV_H_X;
979 break;
980 case MVT::f16:
981 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
982 break;
983 case MVT::f32:
984 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
985 break;
986 case MVT::f64:
987 // For RV32, we can't move from a GPR, we need to convert instead. This
988 // should only happen for +0.0 and -0.0.
989 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
990 if (Is64Bit)
991 Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
992 else
993 Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
994 break;
995 }
996
997 SDNode *Res;
998 if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
999 Res = CurDAG->getMachineNode(
1000 Opc, DL, VT, Imm,
1002 else
1003 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1004
1005 // For f64 -0.0, we need to insert a fneg.d idiom.
1006 if (NegZeroF64) {
1007 Opc = RISCV::FSGNJN_D;
1008 if (HasZdinx)
1009 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1010 Res =
1011 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1012 }
1013
1014 ReplaceNode(Node, Res);
1015 return;
1016 }
1018 if (!Subtarget->hasStdExtZdinx())
1019 break;
1020
1021 assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1022
1023 SDValue Ops[] = {
1024 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1025 Node->getOperand(0),
1026 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1027 Node->getOperand(1),
1028 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1029
1030 SDNode *N =
1031 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::f64, Ops);
1032 ReplaceNode(Node, N);
1033 return;
1034 }
1035 case RISCVISD::SplitF64: {
1036 if (Subtarget->hasStdExtZdinx()) {
1037 assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1038
1039 if (!SDValue(Node, 0).use_empty()) {
1040 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, VT,
1041 Node->getOperand(0));
1042 ReplaceUses(SDValue(Node, 0), Lo);
1043 }
1044
1045 if (!SDValue(Node, 1).use_empty()) {
1046 SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, VT,
1047 Node->getOperand(0));
1048 ReplaceUses(SDValue(Node, 1), Hi);
1049 }
1050
1051 CurDAG->RemoveDeadNode(Node);
1052 return;
1053 }
1054
1055 if (!Subtarget->hasStdExtZfa())
1056 break;
1057 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1058 "Unexpected subtarget");
1059
1060 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1061 if (!SDValue(Node, 0).use_empty()) {
1062 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1063 Node->getOperand(0));
1064 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1065 }
1066 if (!SDValue(Node, 1).use_empty()) {
1067 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1068 Node->getOperand(0));
1069 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1070 }
1071
1072 CurDAG->RemoveDeadNode(Node);
1073 return;
1074 }
1075 case ISD::SHL: {
1076 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1077 if (!N1C)
1078 break;
1079 SDValue N0 = Node->getOperand(0);
1080 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1081 !isa<ConstantSDNode>(N0.getOperand(1)))
1082 break;
1083 unsigned ShAmt = N1C->getZExtValue();
1084 uint64_t Mask = N0.getConstantOperandVal(1);
1085
1086 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
1087 // 32 leading zeros and C3 trailing zeros.
1088 if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
1089 unsigned XLen = Subtarget->getXLen();
1090 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1091 unsigned TrailingZeros = llvm::countr_zero(Mask);
1092 if (TrailingZeros > 0 && LeadingZeros == 32) {
1093 SDNode *SRLIW = CurDAG->getMachineNode(
1094 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1095 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1096 SDNode *SLLI = CurDAG->getMachineNode(
1097 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1098 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1099 ReplaceNode(Node, SLLI);
1100 return;
1101 }
1102 }
1103 break;
1104 }
1105 case ISD::SRL: {
1106 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1107 if (!N1C)
1108 break;
1109 SDValue N0 = Node->getOperand(0);
1110 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1111 break;
1112 unsigned ShAmt = N1C->getZExtValue();
1113 uint64_t Mask = N0.getConstantOperandVal(1);
1114
1115 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1116 // 32 leading zeros and C3 trailing zeros.
1117 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1118 unsigned XLen = Subtarget->getXLen();
1119 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1120 unsigned TrailingZeros = llvm::countr_zero(Mask);
1121 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1122 SDNode *SRLIW = CurDAG->getMachineNode(
1123 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1124 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1125 SDNode *SLLI = CurDAG->getMachineNode(
1126 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1127 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1128 ReplaceNode(Node, SLLI);
1129 return;
1130 }
1131 }
1132
1133 // Optimize (srl (and X, C2), C) ->
1134 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1135 // Where C2 is a mask with C3 trailing ones.
1136 // Taking into account that the C2 may have had lower bits unset by
1137 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1138 // This pattern occurs when type legalizing right shifts for types with
1139 // less than XLen bits.
1140 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1141 if (!isMask_64(Mask))
1142 break;
1143 unsigned TrailingOnes = llvm::countr_one(Mask);
1144 if (ShAmt >= TrailingOnes)
1145 break;
1146 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1147 if (TrailingOnes == 32) {
1148 SDNode *SRLI = CurDAG->getMachineNode(
1149 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1150 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1151 ReplaceNode(Node, SRLI);
1152 return;
1153 }
1154
1155 // Only do the remaining transforms if the AND has one use.
1156 if (!N0.hasOneUse())
1157 break;
1158
1159 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1160 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1161 SDNode *BEXTI = CurDAG->getMachineNode(
1162 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1163 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1164 ReplaceNode(Node, BEXTI);
1165 return;
1166 }
1167
1168 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1169 if (Subtarget->hasVendorXTHeadBb()) {
1170 SDNode *THEXTU = CurDAG->getMachineNode(
1171 RISCV::TH_EXTU, DL, VT, N0->getOperand(0),
1172 CurDAG->getTargetConstant(TrailingOnes - 1, DL, VT),
1173 CurDAG->getTargetConstant(ShAmt, DL, VT));
1174 ReplaceNode(Node, THEXTU);
1175 return;
1176 }
1177
1178 SDNode *SLLI =
1179 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1180 CurDAG->getTargetConstant(LShAmt, DL, VT));
1181 SDNode *SRLI = CurDAG->getMachineNode(
1182 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1183 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1184 ReplaceNode(Node, SRLI);
1185 return;
1186 }
1187 case ISD::SRA: {
1188 if (trySignedBitfieldExtract(Node))
1189 return;
1190
1191 // Optimize (sra (sext_inreg X, i16), C) ->
1192 // (srai (slli X, (XLen-16), (XLen-16) + C)
1193 // And (sra (sext_inreg X, i8), C) ->
1194 // (srai (slli X, (XLen-8), (XLen-8) + C)
1195 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1196 // This transform matches the code we get without Zbb. The shifts are more
1197 // compressible, and this can help expose CSE opportunities in the sdiv by
1198 // constant optimization.
1199 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1200 if (!N1C)
1201 break;
1202 SDValue N0 = Node->getOperand(0);
1203 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1204 break;
1205 unsigned ShAmt = N1C->getZExtValue();
1206 unsigned ExtSize =
1207 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1208 // ExtSize of 32 should use sraiw via tablegen pattern.
1209 if (ExtSize >= 32 || ShAmt >= ExtSize)
1210 break;
1211 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1212 SDNode *SLLI =
1213 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1214 CurDAG->getTargetConstant(LShAmt, DL, VT));
1215 SDNode *SRAI = CurDAG->getMachineNode(
1216 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1217 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1218 ReplaceNode(Node, SRAI);
1219 return;
1220 }
1221 case ISD::OR:
1222 case ISD::XOR:
1223 if (tryShrinkShlLogicImm(Node))
1224 return;
1225
1226 break;
1227 case ISD::AND: {
1228 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1229 if (!N1C)
1230 break;
1231
1232 SDValue N0 = Node->getOperand(0);
1233
1234 auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
1235 SDValue X, unsigned Msb,
1236 unsigned Lsb) {
1237 if (!Subtarget->hasVendorXTHeadBb())
1238 return false;
1239
1240 SDNode *TH_EXTU = CurDAG->getMachineNode(
1241 RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
1242 CurDAG->getTargetConstant(Lsb, DL, VT));
1243 ReplaceNode(Node, TH_EXTU);
1244 return true;
1245 };
1246
1247 bool LeftShift = N0.getOpcode() == ISD::SHL;
1248 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1249 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1250 if (!C)
1251 break;
1252 unsigned C2 = C->getZExtValue();
1253 unsigned XLen = Subtarget->getXLen();
1254 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1255
1256 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1257 // shift pair might offer more compression opportunities.
1258 // TODO: We could check for C extension here, but we don't have many lit
1259 // tests with the C extension enabled so not checking gets better
1260 // coverage.
1261 // TODO: What if ANDI faster than shift?
1262 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1263
1264 uint64_t C1 = N1C->getZExtValue();
1265
1266 // Clear irrelevant bits in the mask.
1267 if (LeftShift)
1268 C1 &= maskTrailingZeros<uint64_t>(C2);
1269 else
1270 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1271
1272 // Some transforms should only be done if the shift has a single use or
1273 // the AND would become (srli (slli X, 32), 32)
1274 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1275
1276 SDValue X = N0.getOperand(0);
1277
1278 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1279 // with c3 leading zeros.
1280 if (!LeftShift && isMask_64(C1)) {
1281 unsigned Leading = XLen - llvm::bit_width(C1);
1282 if (C2 < Leading) {
1283 // If the number of leading zeros is C2+32 this can be SRLIW.
1284 if (C2 + 32 == Leading) {
1285 SDNode *SRLIW = CurDAG->getMachineNode(
1286 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1287 ReplaceNode(Node, SRLIW);
1288 return;
1289 }
1290
1291 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1292 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1293 //
1294 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1295 // legalized and goes through DAG combine.
1296 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1297 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1298 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1299 SDNode *SRAIW =
1300 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1301 CurDAG->getTargetConstant(31, DL, VT));
1302 SDNode *SRLIW = CurDAG->getMachineNode(
1303 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1304 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1305 ReplaceNode(Node, SRLIW);
1306 return;
1307 }
1308
1309 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1310 // available.
1311 // Transform (and (srl x, C2), C1)
1312 // -> (<bfextract> x, msb, lsb)
1313 //
1314 // Make sure to keep this below the SRLIW cases, as we always want to
1315 // prefer the more common instruction.
1316 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1317 const unsigned Lsb = C2;
1318 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1319 return;
1320
1321 // (srli (slli x, c3-c2), c3).
1322 // Skip if we could use (zext.w (sraiw X, C2)).
1323 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1324 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1325 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1326 // Also Skip if we can use bexti or th.tst.
1327 Skip |= HasBitTest && Leading == XLen - 1;
1328 if (OneUseOrZExtW && !Skip) {
1329 SDNode *SLLI = CurDAG->getMachineNode(
1330 RISCV::SLLI, DL, VT, X,
1331 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1332 SDNode *SRLI = CurDAG->getMachineNode(
1333 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1334 CurDAG->getTargetConstant(Leading, DL, VT));
1335 ReplaceNode(Node, SRLI);
1336 return;
1337 }
1338 }
1339 }
1340
1341 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1342 // shifted by c2 bits with c3 leading zeros.
1343 if (LeftShift && isShiftedMask_64(C1)) {
1344 unsigned Leading = XLen - llvm::bit_width(C1);
1345
1346 if (C2 + Leading < XLen &&
1347 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1348 // Use slli.uw when possible.
1349 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1350 SDNode *SLLI_UW =
1351 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1352 CurDAG->getTargetConstant(C2, DL, VT));
1353 ReplaceNode(Node, SLLI_UW);
1354 return;
1355 }
1356
1357 // (srli (slli c2+c3), c3)
1358 if (OneUseOrZExtW && !IsCANDI) {
1359 SDNode *SLLI = CurDAG->getMachineNode(
1360 RISCV::SLLI, DL, VT, X,
1361 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1362 SDNode *SRLI = CurDAG->getMachineNode(
1363 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1364 CurDAG->getTargetConstant(Leading, DL, VT));
1365 ReplaceNode(Node, SRLI);
1366 return;
1367 }
1368 }
1369 }
1370
1371 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1372 // shifted mask with c2 leading zeros and c3 trailing zeros.
1373 if (!LeftShift && isShiftedMask_64(C1)) {
1374 unsigned Leading = XLen - llvm::bit_width(C1);
1375 unsigned Trailing = llvm::countr_zero(C1);
1376 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1377 !IsCANDI) {
1378 unsigned SrliOpc = RISCV::SRLI;
1379 // If the input is zexti32 we should use SRLIW.
1380 if (X.getOpcode() == ISD::AND &&
1381 isa<ConstantSDNode>(X.getOperand(1)) &&
1382 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1383 SrliOpc = RISCV::SRLIW;
1384 X = X.getOperand(0);
1385 }
1386 SDNode *SRLI = CurDAG->getMachineNode(
1387 SrliOpc, DL, VT, X,
1388 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1389 SDNode *SLLI = CurDAG->getMachineNode(
1390 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1391 CurDAG->getTargetConstant(Trailing, DL, VT));
1392 ReplaceNode(Node, SLLI);
1393 return;
1394 }
1395 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1396 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1397 OneUseOrZExtW && !IsCANDI) {
1398 SDNode *SRLIW = CurDAG->getMachineNode(
1399 RISCV::SRLIW, DL, VT, X,
1400 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1401 SDNode *SLLI = CurDAG->getMachineNode(
1402 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1403 CurDAG->getTargetConstant(Trailing, DL, VT));
1404 ReplaceNode(Node, SLLI);
1405 return;
1406 }
1407 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1408 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1409 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1410 SDNode *SRLI = CurDAG->getMachineNode(
1411 RISCV::SRLI, DL, VT, X,
1412 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1413 SDNode *SLLI_UW = CurDAG->getMachineNode(
1414 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1415 CurDAG->getTargetConstant(Trailing, DL, VT));
1416 ReplaceNode(Node, SLLI_UW);
1417 return;
1418 }
1419 }
1420
1421 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1422 // shifted mask with no leading zeros and c3 trailing zeros.
1423 if (LeftShift && isShiftedMask_64(C1)) {
1424 unsigned Leading = XLen - llvm::bit_width(C1);
1425 unsigned Trailing = llvm::countr_zero(C1);
1426 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1427 SDNode *SRLI = CurDAG->getMachineNode(
1428 RISCV::SRLI, DL, VT, X,
1429 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1430 SDNode *SLLI = CurDAG->getMachineNode(
1431 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1432 CurDAG->getTargetConstant(Trailing, DL, VT));
1433 ReplaceNode(Node, SLLI);
1434 return;
1435 }
1436 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1437 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1438 SDNode *SRLIW = CurDAG->getMachineNode(
1439 RISCV::SRLIW, DL, VT, X,
1440 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1441 SDNode *SLLI = CurDAG->getMachineNode(
1442 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1443 CurDAG->getTargetConstant(Trailing, DL, VT));
1444 ReplaceNode(Node, SLLI);
1445 return;
1446 }
1447
1448 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1449 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1450 Subtarget->hasStdExtZba()) {
1451 SDNode *SRLI = CurDAG->getMachineNode(
1452 RISCV::SRLI, DL, VT, X,
1453 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1454 SDNode *SLLI_UW = CurDAG->getMachineNode(
1455 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1456 CurDAG->getTargetConstant(Trailing, DL, VT));
1457 ReplaceNode(Node, SLLI_UW);
1458 return;
1459 }
1460 }
1461 }
1462
1463 const uint64_t C1 = N1C->getZExtValue();
1464
1465 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1466 N0.hasOneUse()) {
1467 unsigned C2 = N0.getConstantOperandVal(1);
1468 unsigned XLen = Subtarget->getXLen();
1469 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1470
1471 SDValue X = N0.getOperand(0);
1472
1473 // Prefer SRAIW + ANDI when possible.
1474 bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1475 X.getOpcode() == ISD::SHL &&
1476 isa<ConstantSDNode>(X.getOperand(1)) &&
1477 X.getConstantOperandVal(1) == 32;
1478 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1479 // mask with c3 leading zeros and c2 is larger than c3.
1480 if (isMask_64(C1) && !Skip) {
1481 unsigned Leading = XLen - llvm::bit_width(C1);
1482 if (C2 > Leading) {
1483 SDNode *SRAI = CurDAG->getMachineNode(
1484 RISCV::SRAI, DL, VT, X,
1485 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1486 SDNode *SRLI = CurDAG->getMachineNode(
1487 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1488 CurDAG->getTargetConstant(Leading, DL, VT));
1489 ReplaceNode(Node, SRLI);
1490 return;
1491 }
1492 }
1493
1494 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1495 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1496 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1497 if (isShiftedMask_64(C1) && !Skip) {
1498 unsigned Leading = XLen - llvm::bit_width(C1);
1499 unsigned Trailing = llvm::countr_zero(C1);
1500 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1501 SDNode *SRAI = CurDAG->getMachineNode(
1502 RISCV::SRAI, DL, VT, N0.getOperand(0),
1503 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1504 SDNode *SRLI = CurDAG->getMachineNode(
1505 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1506 CurDAG->getTargetConstant(Leading + Trailing, DL, VT));
1507 SDNode *SLLI = CurDAG->getMachineNode(
1508 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1509 CurDAG->getTargetConstant(Trailing, DL, VT));
1510 ReplaceNode(Node, SLLI);
1511 return;
1512 }
1513 }
1514 }
1515
1516 // If C1 masks off the upper bits only (but can't be formed as an
1517 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1518 // available.
1519 // Transform (and x, C1)
1520 // -> (<bfextract> x, msb, lsb)
1521 if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue())) {
1522 const unsigned Msb = llvm::bit_width(C1) - 1;
1523 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1524 return;
1525 }
1526
1527 if (tryShrinkShlLogicImm(Node))
1528 return;
1529
1530 break;
1531 }
1532 case ISD::MUL: {
1533 // Special case for calculating (mul (and X, C2), C1) where the full product
1534 // fits in XLen bits. We can shift X left by the number of leading zeros in
1535 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1536 // product has XLen trailing zeros, putting it in the output of MULHU. This
1537 // can avoid materializing a constant in a register for C2.
1538
1539 // RHS should be a constant.
1540 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1541 if (!N1C || !N1C->hasOneUse())
1542 break;
1543
1544 // LHS should be an AND with constant.
1545 SDValue N0 = Node->getOperand(0);
1546 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1547 break;
1548
1550
1551 // Constant should be a mask.
1552 if (!isMask_64(C2))
1553 break;
1554
1555 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1556 // multiple users or the constant is a simm12. This prevents inserting a
1557 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1558 // make it more costly to materialize. Otherwise, using a SLLI might allow
1559 // it to be compressed.
1560 bool IsANDIOrZExt =
1561 isInt<12>(C2) ||
1562 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1563 // With XTHeadBb, we can use TH.EXTU.
1564 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1565 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1566 break;
1567 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1568 // the constant is a simm32.
1569 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1570 // With XTHeadBb, we can use TH.EXTU.
1571 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1572 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1573 break;
1574
1575 // We need to shift left the AND input and C1 by a total of XLen bits.
1576
1577 // How far left do we need to shift the AND input?
1578 unsigned XLen = Subtarget->getXLen();
1579 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1580
1581 // The constant gets shifted by the remaining amount unless that would
1582 // shift bits out.
1583 uint64_t C1 = N1C->getZExtValue();
1584 unsigned ConstantShift = XLen - LeadingZeros;
1585 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1586 break;
1587
1588 uint64_t ShiftedC1 = C1 << ConstantShift;
1589 // If this RV32, we need to sign extend the constant.
1590 if (XLen == 32)
1591 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1592
1593 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1594 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1595 SDNode *SLLI =
1596 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1597 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1598 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1599 SDValue(SLLI, 0), SDValue(Imm, 0));
1600 ReplaceNode(Node, MULHU);
1601 return;
1602 }
1603 case ISD::LOAD: {
1604 if (tryIndexedLoad(Node))
1605 return;
1606
1607 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1608 // We match post-incrementing load here
1609 LoadSDNode *Load = cast<LoadSDNode>(Node);
1610 if (Load->getAddressingMode() != ISD::POST_INC)
1611 break;
1612
1613 SDValue Chain = Node->getOperand(0);
1614 SDValue Base = Node->getOperand(1);
1615 SDValue Offset = Node->getOperand(2);
1616
1617 bool Simm12 = false;
1618 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1619
1620 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1621 int ConstantVal = ConstantOffset->getSExtValue();
1622 Simm12 = isInt<12>(ConstantVal);
1623 if (Simm12)
1624 Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1625 Offset.getValueType());
1626 }
1627
1628 unsigned Opcode = 0;
1629 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1630 case MVT::i8:
1631 if (Simm12 && SignExtend)
1632 Opcode = RISCV::CV_LB_ri_inc;
1633 else if (Simm12 && !SignExtend)
1634 Opcode = RISCV::CV_LBU_ri_inc;
1635 else if (!Simm12 && SignExtend)
1636 Opcode = RISCV::CV_LB_rr_inc;
1637 else
1638 Opcode = RISCV::CV_LBU_rr_inc;
1639 break;
1640 case MVT::i16:
1641 if (Simm12 && SignExtend)
1642 Opcode = RISCV::CV_LH_ri_inc;
1643 else if (Simm12 && !SignExtend)
1644 Opcode = RISCV::CV_LHU_ri_inc;
1645 else if (!Simm12 && SignExtend)
1646 Opcode = RISCV::CV_LH_rr_inc;
1647 else
1648 Opcode = RISCV::CV_LHU_rr_inc;
1649 break;
1650 case MVT::i32:
1651 if (Simm12)
1652 Opcode = RISCV::CV_LW_ri_inc;
1653 else
1654 Opcode = RISCV::CV_LW_rr_inc;
1655 break;
1656 default:
1657 break;
1658 }
1659 if (!Opcode)
1660 break;
1661
1662 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1663 Chain.getSimpleValueType(), Base,
1664 Offset, Chain));
1665 return;
1666 }
1667 break;
1668 }
1670 unsigned IntNo = Node->getConstantOperandVal(0);
1671 switch (IntNo) {
1672 // By default we do not custom select any intrinsic.
1673 default:
1674 break;
1675 case Intrinsic::riscv_vmsgeu:
1676 case Intrinsic::riscv_vmsge: {
1677 SDValue Src1 = Node->getOperand(1);
1678 SDValue Src2 = Node->getOperand(2);
1679 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1680 bool IsCmpUnsignedZero = false;
1681 // Only custom select scalar second operand.
1682 if (Src2.getValueType() != XLenVT)
1683 break;
1684 // Small constants are handled with patterns.
1685 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1686 int64_t CVal = C->getSExtValue();
1687 if (CVal >= -15 && CVal <= 16) {
1688 if (!IsUnsigned || CVal != 0)
1689 break;
1690 IsCmpUnsignedZero = true;
1691 }
1692 }
1693 MVT Src1VT = Src1.getSimpleValueType();
1694 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;
1695 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1696 default:
1697 llvm_unreachable("Unexpected LMUL!");
1698#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \
1699 case RISCVII::VLMUL::lmulenum: \
1700 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1701 : RISCV::PseudoVMSLT_VX_##suffix; \
1702 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1703 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \
1704 break;
1705 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)
1706 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)
1707 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)
1709 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)
1710 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)
1711 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)
1712#undef CASE_VMSLT_VMNAND_VMSET_OPCODES
1713 }
1715 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1716 SDValue VL;
1717 selectVLOp(Node->getOperand(3), VL);
1718
1719 // If vmsgeu with 0 immediate, expand it to vmset.
1720 if (IsCmpUnsignedZero) {
1721 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));
1722 return;
1723 }
1724
1725 // Expand to
1726 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1727 SDValue Cmp = SDValue(
1728 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1729 0);
1730 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1731 {Cmp, Cmp, VL, SEW}));
1732 return;
1733 }
1734 case Intrinsic::riscv_vmsgeu_mask:
1735 case Intrinsic::riscv_vmsge_mask: {
1736 SDValue Src1 = Node->getOperand(2);
1737 SDValue Src2 = Node->getOperand(3);
1738 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1739 bool IsCmpUnsignedZero = false;
1740 // Only custom select scalar second operand.
1741 if (Src2.getValueType() != XLenVT)
1742 break;
1743 // Small constants are handled with patterns.
1744 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1745 int64_t CVal = C->getSExtValue();
1746 if (CVal >= -15 && CVal <= 16) {
1747 if (!IsUnsigned || CVal != 0)
1748 break;
1749 IsCmpUnsignedZero = true;
1750 }
1751 }
1752 MVT Src1VT = Src1.getSimpleValueType();
1753 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1754 VMOROpcode;
1755 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1756 default:
1757 llvm_unreachable("Unexpected LMUL!");
1758#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \
1759 case RISCVII::VLMUL::lmulenum: \
1760 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1761 : RISCV::PseudoVMSLT_VX_##suffix; \
1762 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
1763 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
1764 break;
1765 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)
1766 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)
1767 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)
1768 CASE_VMSLT_OPCODES(LMUL_1, M1, B8)
1769 CASE_VMSLT_OPCODES(LMUL_2, M2, B16)
1770 CASE_VMSLT_OPCODES(LMUL_4, M4, B32)
1771 CASE_VMSLT_OPCODES(LMUL_8, M8, B64)
1772#undef CASE_VMSLT_OPCODES
1773 }
1774 // Mask operations use the LMUL from the mask type.
1775 switch (RISCVTargetLowering::getLMUL(VT)) {
1776 default:
1777 llvm_unreachable("Unexpected LMUL!");
1778#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
1779 case RISCVII::VLMUL::lmulenum: \
1780 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
1781 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
1782 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
1783 break;
1784 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)
1785 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)
1786 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)
1791#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1792 }
1794 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1795 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1796 SDValue VL;
1797 selectVLOp(Node->getOperand(5), VL);
1798 SDValue MaskedOff = Node->getOperand(1);
1799 SDValue Mask = Node->getOperand(4);
1800
1801 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.
1802 if (IsCmpUnsignedZero) {
1803 // We don't need vmor if the MaskedOff and the Mask are the same
1804 // value.
1805 if (Mask == MaskedOff) {
1806 ReplaceUses(Node, Mask.getNode());
1807 return;
1808 }
1809 ReplaceNode(Node,
1810 CurDAG->getMachineNode(VMOROpcode, DL, VT,
1811 {Mask, MaskedOff, VL, MaskSEW}));
1812 return;
1813 }
1814
1815 // If the MaskedOff value and the Mask are the same value use
1816 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
1817 // This avoids needing to copy v0 to vd before starting the next sequence.
1818 if (Mask == MaskedOff) {
1819 SDValue Cmp = SDValue(
1820 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1821 0);
1822 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1823 {Mask, Cmp, VL, MaskSEW}));
1824 return;
1825 }
1826
1827 // Mask needs to be copied to V0.
1829 RISCV::V0, Mask, SDValue());
1830 SDValue Glue = Chain.getValue(1);
1831 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1832
1833 // Otherwise use
1834 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1835 // The result is mask undisturbed.
1836 // We use the same instructions to emulate mask agnostic behavior, because
1837 // the agnostic result can be either undisturbed or all 1.
1838 SDValue Cmp = SDValue(
1839 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1840 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1841 0);
1842 // vmxor.mm vd, vd, v0 is used to update active value.
1843 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1844 {Cmp, Mask, VL, MaskSEW}));
1845 return;
1846 }
1847 case Intrinsic::riscv_vsetvli:
1848 case Intrinsic::riscv_vsetvlimax:
1849 return selectVSETVLI(Node);
1850 }
1851 break;
1852 }
1854 unsigned IntNo = Node->getConstantOperandVal(1);
1855 switch (IntNo) {
1856 // By default we do not custom select any intrinsic.
1857 default:
1858 break;
1859 case Intrinsic::riscv_vlseg2:
1860 case Intrinsic::riscv_vlseg3:
1861 case Intrinsic::riscv_vlseg4:
1862 case Intrinsic::riscv_vlseg5:
1863 case Intrinsic::riscv_vlseg6:
1864 case Intrinsic::riscv_vlseg7:
1865 case Intrinsic::riscv_vlseg8: {
1866 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1867 return;
1868 }
1869 case Intrinsic::riscv_vlseg2_mask:
1870 case Intrinsic::riscv_vlseg3_mask:
1871 case Intrinsic::riscv_vlseg4_mask:
1872 case Intrinsic::riscv_vlseg5_mask:
1873 case Intrinsic::riscv_vlseg6_mask:
1874 case Intrinsic::riscv_vlseg7_mask:
1875 case Intrinsic::riscv_vlseg8_mask: {
1876 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1877 return;
1878 }
1879 case Intrinsic::riscv_vlsseg2:
1880 case Intrinsic::riscv_vlsseg3:
1881 case Intrinsic::riscv_vlsseg4:
1882 case Intrinsic::riscv_vlsseg5:
1883 case Intrinsic::riscv_vlsseg6:
1884 case Intrinsic::riscv_vlsseg7:
1885 case Intrinsic::riscv_vlsseg8: {
1886 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1887 return;
1888 }
1889 case Intrinsic::riscv_vlsseg2_mask:
1890 case Intrinsic::riscv_vlsseg3_mask:
1891 case Intrinsic::riscv_vlsseg4_mask:
1892 case Intrinsic::riscv_vlsseg5_mask:
1893 case Intrinsic::riscv_vlsseg6_mask:
1894 case Intrinsic::riscv_vlsseg7_mask:
1895 case Intrinsic::riscv_vlsseg8_mask: {
1896 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1897 return;
1898 }
1899 case Intrinsic::riscv_vloxseg2:
1900 case Intrinsic::riscv_vloxseg3:
1901 case Intrinsic::riscv_vloxseg4:
1902 case Intrinsic::riscv_vloxseg5:
1903 case Intrinsic::riscv_vloxseg6:
1904 case Intrinsic::riscv_vloxseg7:
1905 case Intrinsic::riscv_vloxseg8:
1906 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1907 return;
1908 case Intrinsic::riscv_vluxseg2:
1909 case Intrinsic::riscv_vluxseg3:
1910 case Intrinsic::riscv_vluxseg4:
1911 case Intrinsic::riscv_vluxseg5:
1912 case Intrinsic::riscv_vluxseg6:
1913 case Intrinsic::riscv_vluxseg7:
1914 case Intrinsic::riscv_vluxseg8:
1915 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1916 return;
1917 case Intrinsic::riscv_vloxseg2_mask:
1918 case Intrinsic::riscv_vloxseg3_mask:
1919 case Intrinsic::riscv_vloxseg4_mask:
1920 case Intrinsic::riscv_vloxseg5_mask:
1921 case Intrinsic::riscv_vloxseg6_mask:
1922 case Intrinsic::riscv_vloxseg7_mask:
1923 case Intrinsic::riscv_vloxseg8_mask:
1924 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1925 return;
1926 case Intrinsic::riscv_vluxseg2_mask:
1927 case Intrinsic::riscv_vluxseg3_mask:
1928 case Intrinsic::riscv_vluxseg4_mask:
1929 case Intrinsic::riscv_vluxseg5_mask:
1930 case Intrinsic::riscv_vluxseg6_mask:
1931 case Intrinsic::riscv_vluxseg7_mask:
1932 case Intrinsic::riscv_vluxseg8_mask:
1933 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1934 return;
1935 case Intrinsic::riscv_vlseg8ff:
1936 case Intrinsic::riscv_vlseg7ff:
1937 case Intrinsic::riscv_vlseg6ff:
1938 case Intrinsic::riscv_vlseg5ff:
1939 case Intrinsic::riscv_vlseg4ff:
1940 case Intrinsic::riscv_vlseg3ff:
1941 case Intrinsic::riscv_vlseg2ff: {
1942 selectVLSEGFF(Node, /*IsMasked*/ false);
1943 return;
1944 }
1945 case Intrinsic::riscv_vlseg8ff_mask:
1946 case Intrinsic::riscv_vlseg7ff_mask:
1947 case Intrinsic::riscv_vlseg6ff_mask:
1948 case Intrinsic::riscv_vlseg5ff_mask:
1949 case Intrinsic::riscv_vlseg4ff_mask:
1950 case Intrinsic::riscv_vlseg3ff_mask:
1951 case Intrinsic::riscv_vlseg2ff_mask: {
1952 selectVLSEGFF(Node, /*IsMasked*/ true);
1953 return;
1954 }
1955 case Intrinsic::riscv_vloxei:
1956 case Intrinsic::riscv_vloxei_mask:
1957 case Intrinsic::riscv_vluxei:
1958 case Intrinsic::riscv_vluxei_mask: {
1959 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1960 IntNo == Intrinsic::riscv_vluxei_mask;
1961 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1962 IntNo == Intrinsic::riscv_vloxei_mask;
1963
1964 MVT VT = Node->getSimpleValueType(0);
1965 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1966
1967 unsigned CurOp = 2;
1969 Operands.push_back(Node->getOperand(CurOp++));
1970
1971 MVT IndexVT;
1972 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1973 /*IsStridedOrIndexed*/ true, Operands,
1974 /*IsLoad=*/true, &IndexVT);
1975
1977 "Element count mismatch");
1978
1980 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1981 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1982 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1983 report_fatal_error("The V extension does not support EEW=64 for index "
1984 "values when XLEN=32");
1985 }
1986 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
1987 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
1988 static_cast<unsigned>(IndexLMUL));
1989 MachineSDNode *Load =
1990 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1991
1992 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1993 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1994
1995 ReplaceNode(Node, Load);
1996 return;
1997 }
1998 case Intrinsic::riscv_vlm:
1999 case Intrinsic::riscv_vle:
2000 case Intrinsic::riscv_vle_mask:
2001 case Intrinsic::riscv_vlse:
2002 case Intrinsic::riscv_vlse_mask: {
2003 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2004 IntNo == Intrinsic::riscv_vlse_mask;
2005 bool IsStrided =
2006 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2007
2008 MVT VT = Node->getSimpleValueType(0);
2009 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2010
2011 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2012 // operand at the IR level. In pseudos, they have both policy and
2013 // passthru operand. The passthru operand is needed to track the
2014 // "tail undefined" state, and the policy is there just for
2015 // for consistency - it will always be "don't care" for the
2016 // unmasked form.
2017 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2018 unsigned CurOp = 2;
2020 if (HasPassthruOperand)
2021 Operands.push_back(Node->getOperand(CurOp++));
2022 else {
2023 // We eagerly lower to implicit_def (instead of undef), as we
2024 // otherwise fail to select nodes such as: nxv1i1 = undef
2025 SDNode *Passthru =
2026 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
2027 Operands.push_back(SDValue(Passthru, 0));
2028 }
2029 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2030 Operands, /*IsLoad=*/true);
2031
2033 const RISCV::VLEPseudo *P =
2034 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2035 static_cast<unsigned>(LMUL));
2036 MachineSDNode *Load =
2037 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2038
2039 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2040 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2041
2042 ReplaceNode(Node, Load);
2043 return;
2044 }
2045 case Intrinsic::riscv_vleff:
2046 case Intrinsic::riscv_vleff_mask: {
2047 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2048
2049 MVT VT = Node->getSimpleValueType(0);
2050 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2051
2052 unsigned CurOp = 2;
2054 Operands.push_back(Node->getOperand(CurOp++));
2055 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2056 /*IsStridedOrIndexed*/ false, Operands,
2057 /*IsLoad=*/true);
2058
2060 const RISCV::VLEPseudo *P =
2061 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2062 Log2SEW, static_cast<unsigned>(LMUL));
2064 P->Pseudo, DL, Node->getVTList(), Operands);
2065 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2066 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2067
2068 ReplaceNode(Node, Load);
2069 return;
2070 }
2071 }
2072 break;
2073 }
2074 case ISD::INTRINSIC_VOID: {
2075 unsigned IntNo = Node->getConstantOperandVal(1);
2076 switch (IntNo) {
2077 case Intrinsic::riscv_vsseg2:
2078 case Intrinsic::riscv_vsseg3:
2079 case Intrinsic::riscv_vsseg4:
2080 case Intrinsic::riscv_vsseg5:
2081 case Intrinsic::riscv_vsseg6:
2082 case Intrinsic::riscv_vsseg7:
2083 case Intrinsic::riscv_vsseg8: {
2084 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
2085 return;
2086 }
2087 case Intrinsic::riscv_vsseg2_mask:
2088 case Intrinsic::riscv_vsseg3_mask:
2089 case Intrinsic::riscv_vsseg4_mask:
2090 case Intrinsic::riscv_vsseg5_mask:
2091 case Intrinsic::riscv_vsseg6_mask:
2092 case Intrinsic::riscv_vsseg7_mask:
2093 case Intrinsic::riscv_vsseg8_mask: {
2094 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
2095 return;
2096 }
2097 case Intrinsic::riscv_vssseg2:
2098 case Intrinsic::riscv_vssseg3:
2099 case Intrinsic::riscv_vssseg4:
2100 case Intrinsic::riscv_vssseg5:
2101 case Intrinsic::riscv_vssseg6:
2102 case Intrinsic::riscv_vssseg7:
2103 case Intrinsic::riscv_vssseg8: {
2104 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
2105 return;
2106 }
2107 case Intrinsic::riscv_vssseg2_mask:
2108 case Intrinsic::riscv_vssseg3_mask:
2109 case Intrinsic::riscv_vssseg4_mask:
2110 case Intrinsic::riscv_vssseg5_mask:
2111 case Intrinsic::riscv_vssseg6_mask:
2112 case Intrinsic::riscv_vssseg7_mask:
2113 case Intrinsic::riscv_vssseg8_mask: {
2114 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
2115 return;
2116 }
2117 case Intrinsic::riscv_vsoxseg2:
2118 case Intrinsic::riscv_vsoxseg3:
2119 case Intrinsic::riscv_vsoxseg4:
2120 case Intrinsic::riscv_vsoxseg5:
2121 case Intrinsic::riscv_vsoxseg6:
2122 case Intrinsic::riscv_vsoxseg7:
2123 case Intrinsic::riscv_vsoxseg8:
2124 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
2125 return;
2126 case Intrinsic::riscv_vsuxseg2:
2127 case Intrinsic::riscv_vsuxseg3:
2128 case Intrinsic::riscv_vsuxseg4:
2129 case Intrinsic::riscv_vsuxseg5:
2130 case Intrinsic::riscv_vsuxseg6:
2131 case Intrinsic::riscv_vsuxseg7:
2132 case Intrinsic::riscv_vsuxseg8:
2133 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
2134 return;
2135 case Intrinsic::riscv_vsoxseg2_mask:
2136 case Intrinsic::riscv_vsoxseg3_mask:
2137 case Intrinsic::riscv_vsoxseg4_mask:
2138 case Intrinsic::riscv_vsoxseg5_mask:
2139 case Intrinsic::riscv_vsoxseg6_mask:
2140 case Intrinsic::riscv_vsoxseg7_mask:
2141 case Intrinsic::riscv_vsoxseg8_mask:
2142 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
2143 return;
2144 case Intrinsic::riscv_vsuxseg2_mask:
2145 case Intrinsic::riscv_vsuxseg3_mask:
2146 case Intrinsic::riscv_vsuxseg4_mask:
2147 case Intrinsic::riscv_vsuxseg5_mask:
2148 case Intrinsic::riscv_vsuxseg6_mask:
2149 case Intrinsic::riscv_vsuxseg7_mask:
2150 case Intrinsic::riscv_vsuxseg8_mask:
2151 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
2152 return;
2153 case Intrinsic::riscv_vsoxei:
2154 case Intrinsic::riscv_vsoxei_mask:
2155 case Intrinsic::riscv_vsuxei:
2156 case Intrinsic::riscv_vsuxei_mask: {
2157 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2158 IntNo == Intrinsic::riscv_vsuxei_mask;
2159 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2160 IntNo == Intrinsic::riscv_vsoxei_mask;
2161
2162 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2163 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2164
2165 unsigned CurOp = 2;
2167 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2168
2169 MVT IndexVT;
2170 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2171 /*IsStridedOrIndexed*/ true, Operands,
2172 /*IsLoad=*/false, &IndexVT);
2173
2175 "Element count mismatch");
2176
2178 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2179 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2180 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2181 report_fatal_error("The V extension does not support EEW=64 for index "
2182 "values when XLEN=32");
2183 }
2184 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2185 IsMasked, IsOrdered, IndexLog2EEW,
2186 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2187 MachineSDNode *Store =
2188 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2189
2190 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2191 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2192
2193 ReplaceNode(Node, Store);
2194 return;
2195 }
2196 case Intrinsic::riscv_vsm:
2197 case Intrinsic::riscv_vse:
2198 case Intrinsic::riscv_vse_mask:
2199 case Intrinsic::riscv_vsse:
2200 case Intrinsic::riscv_vsse_mask: {
2201 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2202 IntNo == Intrinsic::riscv_vsse_mask;
2203 bool IsStrided =
2204 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2205
2206 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2207 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2208
2209 unsigned CurOp = 2;
2211 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2212
2213 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2214 Operands);
2215
2217 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2218 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2219 MachineSDNode *Store =
2220 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2221 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2222 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2223
2224 ReplaceNode(Node, Store);
2225 return;
2226 }
2227 case Intrinsic::riscv_sf_vc_x_se:
2228 case Intrinsic::riscv_sf_vc_i_se:
2229 selectSF_VC_X_SE(Node);
2230 return;
2231 }
2232 break;
2233 }
2234 case ISD::BITCAST: {
2235 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2236 // Just drop bitcasts between vectors if both are fixed or both are
2237 // scalable.
2238 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2239 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2240 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2241 CurDAG->RemoveDeadNode(Node);
2242 return;
2243 }
2244 break;
2245 }
2246 case ISD::INSERT_SUBVECTOR: {
2247 SDValue V = Node->getOperand(0);
2248 SDValue SubV = Node->getOperand(1);
2249 SDLoc DL(SubV);
2250 auto Idx = Node->getConstantOperandVal(2);
2251 MVT SubVecVT = SubV.getSimpleValueType();
2252
2253 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2254 MVT SubVecContainerVT = SubVecVT;
2255 // Establish the correct scalable-vector types for any fixed-length type.
2256 if (SubVecVT.isFixedLengthVector()) {
2257 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2259 [[maybe_unused]] bool ExactlyVecRegSized =
2260 Subtarget->expandVScale(SubVecVT.getSizeInBits())
2261 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2262 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2263 .getKnownMinValue()));
2264 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2265 }
2266 MVT ContainerVT = VT;
2267 if (VT.isFixedLengthVector())
2268 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2269
2270 const auto *TRI = Subtarget->getRegisterInfo();
2271 unsigned SubRegIdx;
2272 std::tie(SubRegIdx, Idx) =
2274 ContainerVT, SubVecContainerVT, Idx, TRI);
2275
2276 // If the Idx hasn't been completely eliminated then this is a subvector
2277 // insert which doesn't naturally align to a vector register. These must
2278 // be handled using instructions to manipulate the vector registers.
2279 if (Idx != 0)
2280 break;
2281
2282 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
2283 [[maybe_unused]] bool IsSubVecPartReg =
2284 SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
2285 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
2286 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
2287 assert((!IsSubVecPartReg || V.isUndef()) &&
2288 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2289 "the subvector is smaller than a full-sized register");
2290
2291 // If we haven't set a SubRegIdx, then we must be going between
2292 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2293 if (SubRegIdx == RISCV::NoSubRegister) {
2294 unsigned InRegClassID =
2297 InRegClassID &&
2298 "Unexpected subvector extraction");
2299 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2300 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2301 DL, VT, SubV, RC);
2302 ReplaceNode(Node, NewNode);
2303 return;
2304 }
2305
2306 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2307 ReplaceNode(Node, Insert.getNode());
2308 return;
2309 }
2311 SDValue V = Node->getOperand(0);
2312 auto Idx = Node->getConstantOperandVal(1);
2313 MVT InVT = V.getSimpleValueType();
2314 SDLoc DL(V);
2315
2316 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2317 MVT SubVecContainerVT = VT;
2318 // Establish the correct scalable-vector types for any fixed-length type.
2319 if (VT.isFixedLengthVector()) {
2320 assert(Idx == 0);
2321 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2322 }
2323 if (InVT.isFixedLengthVector())
2324 InVT = TLI.getContainerForFixedLengthVector(InVT);
2325
2326 const auto *TRI = Subtarget->getRegisterInfo();
2327 unsigned SubRegIdx;
2328 std::tie(SubRegIdx, Idx) =
2330 InVT, SubVecContainerVT, Idx, TRI);
2331
2332 // If the Idx hasn't been completely eliminated then this is a subvector
2333 // extract which doesn't naturally align to a vector register. These must
2334 // be handled using instructions to manipulate the vector registers.
2335 if (Idx != 0)
2336 break;
2337
2338 // If we haven't set a SubRegIdx, then we must be going between
2339 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2340 if (SubRegIdx == RISCV::NoSubRegister) {
2341 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2343 InRegClassID &&
2344 "Unexpected subvector extraction");
2345 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2346 SDNode *NewNode =
2347 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2348 ReplaceNode(Node, NewNode);
2349 return;
2350 }
2351
2352 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2353 ReplaceNode(Node, Extract.getNode());
2354 return;
2355 }
2359 case RISCVISD::VFMV_V_F_VL: {
2360 // Try to match splat of a scalar load to a strided load with stride of x0.
2361 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2362 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2363 if (!Node->getOperand(0).isUndef())
2364 break;
2365 SDValue Src = Node->getOperand(1);
2366 auto *Ld = dyn_cast<LoadSDNode>(Src);
2367 // Can't fold load update node because the second
2368 // output is used so that load update node can't be removed.
2369 if (!Ld || Ld->isIndexed())
2370 break;
2371 EVT MemVT = Ld->getMemoryVT();
2372 // The memory VT should be the same size as the element type.
2373 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2374 break;
2375 if (!IsProfitableToFold(Src, Node, Node) ||
2376 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2377 break;
2378
2379 SDValue VL;
2380 if (IsScalarMove) {
2381 // We could deal with more VL if we update the VSETVLI insert pass to
2382 // avoid introducing more VSETVLI.
2383 if (!isOneConstant(Node->getOperand(2)))
2384 break;
2385 selectVLOp(Node->getOperand(2), VL);
2386 } else
2387 selectVLOp(Node->getOperand(2), VL);
2388
2389 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2390 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2391
2392 // If VL=1, then we don't need to do a strided load and can just do a
2393 // regular load.
2394 bool IsStrided = !isOneConstant(VL);
2395
2396 // Only do a strided load if we have optimized zero-stride vector load.
2397 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2398 break;
2399
2401 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2402 Ld->getBasePtr()};
2403 if (IsStrided)
2404 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2406 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2407 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2408
2410 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2411 /*IsMasked*/ false, IsStrided, /*FF*/ false,
2412 Log2SEW, static_cast<unsigned>(LMUL));
2413 MachineSDNode *Load =
2414 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2415 // Update the chain.
2416 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2417 // Record the mem-refs
2418 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2419 // Replace the splat with the vlse.
2420 ReplaceNode(Node, Load);
2421 return;
2422 }
2423 case ISD::PREFETCH:
2424 unsigned Locality = Node->getConstantOperandVal(3);
2425 if (Locality > 2)
2426 break;
2427
2428 if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) {
2429 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2431
2432 int NontemporalLevel = 0;
2433 switch (Locality) {
2434 case 0:
2435 NontemporalLevel = 3; // NTL.ALL
2436 break;
2437 case 1:
2438 NontemporalLevel = 1; // NTL.PALL
2439 break;
2440 case 2:
2441 NontemporalLevel = 0; // NTL.P1
2442 break;
2443 default:
2444 llvm_unreachable("unexpected locality value.");
2445 }
2446
2447 if (NontemporalLevel & 0b1)
2449 if (NontemporalLevel & 0b10)
2451 }
2452 break;
2453 }
2454
2455 // Select the default instruction.
2456 SelectCode(Node);
2457}
2458
2460 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2461 std::vector<SDValue> &OutOps) {
2462 // Always produce a register and immediate operand, as expected by
2463 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2464 switch (ConstraintID) {
2467 SDValue Op0, Op1;
2468 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2469 assert(Found && "SelectAddrRegImm should always succeed");
2470 OutOps.push_back(Op0);
2471 OutOps.push_back(Op1);
2472 return false;
2473 }
2475 OutOps.push_back(Op);
2476 OutOps.push_back(
2477 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2478 return false;
2479 default:
2480 report_fatal_error("Unexpected asm memory constraint " +
2481 InlineAsm::getMemConstraintName(ConstraintID));
2482 }
2483
2484 return true;
2485}
2486
2488 SDValue &Offset) {
2489 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2490 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2491 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2492 return true;
2493 }
2494
2495 return false;
2496}
2497
2498// Select a frame index and an optional immediate offset from an ADD or OR.
2500 SDValue &Offset) {
2502 return true;
2503
2505 return false;
2506
2507 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
2508 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2509 if (isInt<12>(CVal)) {
2510 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
2511 Subtarget->getXLenVT());
2513 CVal, SDLoc(Addr), Subtarget->getXLenVT(), /*isTarget=*/true);
2514 return true;
2515 }
2516 }
2517
2518 return false;
2519}
2520
2521// Fold constant addresses.
2522static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2523 const MVT VT, const RISCVSubtarget *Subtarget,
2525 bool IsPrefetch = false) {
2526 if (!isa<ConstantSDNode>(Addr))
2527 return false;
2528
2529 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2530
2531 // If the constant is a simm12, we can fold the whole constant and use X0 as
2532 // the base. If the constant can be materialized with LUI+simm12, use LUI as
2533 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2534 int64_t Lo12 = SignExtend64<12>(CVal);
2535 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2536 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2537 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2538 return false;
2539
2540 if (Hi) {
2541 int64_t Hi20 = (Hi >> 12) & 0xfffff;
2542 Base = SDValue(
2543 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2544 CurDAG->getTargetConstant(Hi20, DL, VT)),
2545 0);
2546 } else {
2547 Base = CurDAG->getRegister(RISCV::X0, VT);
2548 }
2549 Offset = CurDAG->getSignedConstant(Lo12, DL, VT, /*isTarget=*/true);
2550 return true;
2551 }
2552
2553 // Ask how constant materialization would handle this constant.
2554 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2555
2556 // If the last instruction would be an ADDI, we can fold its immediate and
2557 // emit the rest of the sequence as the base.
2558 if (Seq.back().getOpcode() != RISCV::ADDI)
2559 return false;
2560 Lo12 = Seq.back().getImm();
2561 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2562 return false;
2563
2564 // Drop the last instruction.
2565 Seq.pop_back();
2566 assert(!Seq.empty() && "Expected more instructions in sequence");
2567
2568 Base = selectImmSeq(CurDAG, DL, VT, Seq);
2569 Offset = CurDAG->getSignedConstant(Lo12, DL, VT, /*isTarget=*/true);
2570 return true;
2571}
2572
2573// Is this ADD instruction only used as the base pointer of scalar loads and
2574// stores?
2576 for (auto *Use : Add->uses()) {
2577 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
2578 Use->getOpcode() != ISD::ATOMIC_LOAD &&
2579 Use->getOpcode() != ISD::ATOMIC_STORE)
2580 return false;
2581 EVT VT = cast<MemSDNode>(Use)->getMemoryVT();
2582 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2583 VT != MVT::f64)
2584 return false;
2585 // Don't allow stores of the value. It must be used as the address.
2586 if (Use->getOpcode() == ISD::STORE &&
2587 cast<StoreSDNode>(Use)->getValue() == Add)
2588 return false;
2589 if (Use->getOpcode() == ISD::ATOMIC_STORE &&
2590 cast<AtomicSDNode>(Use)->getVal() == Add)
2591 return false;
2592 }
2593
2594 return true;
2595}
2596
2598 unsigned MaxShiftAmount,
2600 SDValue &Scale) {
2601 EVT VT = Addr.getSimpleValueType();
2602 auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2603 SDValue &Shift) {
2604 uint64_t ShiftAmt = 0;
2605 Index = N;
2606
2607 if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
2608 // Only match shifts by a value in range [0, MaxShiftAmount].
2609 if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
2610 Index = N.getOperand(0);
2611 ShiftAmt = N.getConstantOperandVal(1);
2612 }
2613 }
2614
2615 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
2616 return ShiftAmt != 0;
2617 };
2618
2619 if (Addr.getOpcode() == ISD::ADD) {
2620 if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2621 SDValue AddrB = Addr.getOperand(0);
2622 if (AddrB.getOpcode() == ISD::ADD &&
2623 UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
2624 !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
2625 isInt<12>(C1->getSExtValue())) {
2626 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2627 SDValue C1Val =
2628 CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
2629 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
2630 AddrB.getOperand(1), C1Val),
2631 0);
2632 return true;
2633 }
2634 } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
2635 Base = Addr.getOperand(1);
2636 return true;
2637 } else {
2638 UnwrapShl(Addr.getOperand(1), Index, Scale);
2639 Base = Addr.getOperand(0);
2640 return true;
2641 }
2642 } else if (UnwrapShl(Addr, Index, Scale)) {
2643 EVT VT = Addr.getValueType();
2644 Base = CurDAG->getRegister(RISCV::X0, VT);
2645 return true;
2646 }
2647
2648 return false;
2649}
2650
2652 SDValue &Offset, bool IsINX) {
2654 return true;
2655
2656 SDLoc DL(Addr);
2657 MVT VT = Addr.getSimpleValueType();
2658
2659 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2660 Base = Addr.getOperand(0);
2661 Offset = Addr.getOperand(1);
2662 return true;
2663 }
2664
2665 int64_t RV32ZdinxRange = IsINX ? 4 : 0;
2667 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2668 if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
2669 Base = Addr.getOperand(0);
2670 if (Base.getOpcode() == RISCVISD::ADD_LO) {
2671 SDValue LoOperand = Base.getOperand(1);
2672 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2673 // If the Lo in (ADD_LO hi, lo) is a global variable's address
2674 // (its low part, really), then we can rely on the alignment of that
2675 // variable to provide a margin of safety before low part can overflow
2676 // the 12 bits of the load/store offset. Check if CVal falls within
2677 // that margin; if so (low part + CVal) can't overflow.
2678 const DataLayout &DL = CurDAG->getDataLayout();
2679 Align Alignment = commonAlignment(
2680 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2681 if (CVal == 0 || Alignment > CVal) {
2682 int64_t CombinedOffset = CVal + GA->getOffset();
2683 Base = Base.getOperand(0);
2685 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2686 CombinedOffset, GA->getTargetFlags());
2687 return true;
2688 }
2689 }
2690 }
2691
2692 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2693 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2694 Offset = CurDAG->getSignedConstant(CVal, DL, VT, /*isTarget=*/true);
2695 return true;
2696 }
2697 }
2698
2699 // Handle ADD with large immediates.
2700 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2701 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2702 assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
2703 "simm12 not already handled?");
2704
2705 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2706 // an ADDI for part of the offset and fold the rest into the load/store.
2707 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2708 if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
2709 int64_t Adj = CVal < 0 ? -2048 : 2047;
2710 Base = SDValue(
2712 RISCV::ADDI, DL, VT, Addr.getOperand(0),
2713 CurDAG->getSignedConstant(Adj, DL, VT, /*isTarget=*/true)),
2714 0);
2715 Offset = CurDAG->getSignedConstant(CVal - Adj, DL, VT, /*isTarget=*/true);
2716 return true;
2717 }
2718
2719 // For larger immediates, we might be able to save one instruction from
2720 // constant materialization by folding the Lo12 bits of the immediate into
2721 // the address. We should only do this if the ADD is only used by loads and
2722 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2723 // separately with the full materialized immediate creating extra
2724 // instructions.
2725 if (isWorthFoldingAdd(Addr) &&
2726 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2727 Offset)) {
2728 // Insert an ADD instruction with the materialized Hi52 bits.
2729 Base = SDValue(
2730 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2731 0);
2732 return true;
2733 }
2734 }
2735
2736 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset))
2737 return true;
2738
2739 Base = Addr;
2740 Offset = CurDAG->getTargetConstant(0, DL, VT);
2741 return true;
2742}
2743
2744/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2745/// Offset shoule be all zeros.
2747 SDValue &Offset) {
2749 return true;
2750
2751 SDLoc DL(Addr);
2752 MVT VT = Addr.getSimpleValueType();
2753
2755 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2756 if (isInt<12>(CVal)) {
2757 Base = Addr.getOperand(0);
2758
2759 // Early-out if not a valid offset.
2760 if ((CVal & 0b11111) != 0) {
2761 Base = Addr;
2762 Offset = CurDAG->getTargetConstant(0, DL, VT);
2763 return true;
2764 }
2765
2766 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2767 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2768 Offset = CurDAG->getSignedConstant(CVal, DL, VT, /*isTarget=*/true);
2769 return true;
2770 }
2771 }
2772
2773 // Handle ADD with large immediates.
2774 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2775 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2776 assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
2777 "simm12 not already handled?");
2778
2779 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
2780 // one instruction by folding adjustment (-2048 or 2016) into the address.
2781 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
2782 int64_t Adj = CVal < 0 ? -2048 : 2016;
2783 int64_t AdjustedOffset = CVal - Adj;
2785 RISCV::ADDI, DL, VT, Addr.getOperand(0),
2786 CurDAG->getSignedConstant(AdjustedOffset, DL, VT,
2787 /*isTarget=*/true)),
2788 0);
2789 Offset = CurDAG->getSignedConstant(Adj, DL, VT, /*isTarget=*/true);
2790 return true;
2791 }
2792
2793 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2794 Offset, true)) {
2795 // Insert an ADD instruction with the materialized Hi52 bits.
2796 Base = SDValue(
2797 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2798 0);
2799 return true;
2800 }
2801 }
2802
2803 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true))
2804 return true;
2805
2806 Base = Addr;
2807 Offset = CurDAG->getTargetConstant(0, DL, VT);
2808 return true;
2809}
2810
2812 SDValue &Offset) {
2813 if (Addr.getOpcode() != ISD::ADD)
2814 return false;
2815
2816 if (isa<ConstantSDNode>(Addr.getOperand(1)))
2817 return false;
2818
2819 Base = Addr.getOperand(1);
2820 Offset = Addr.getOperand(0);
2821 return true;
2822}
2823
2825 SDValue &ShAmt) {
2826 ShAmt = N;
2827
2828 // Peek through zext.
2829 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
2830 ShAmt = ShAmt.getOperand(0);
2831
2832 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
2833 // amount. If there is an AND on the shift amount, we can bypass it if it
2834 // doesn't affect any of those bits.
2835 if (ShAmt.getOpcode() == ISD::AND &&
2836 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2837 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
2838
2839 // Since the max shift amount is a power of 2 we can subtract 1 to make a
2840 // mask that covers the bits needed to represent all shift amounts.
2841 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
2842 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
2843
2844 if (ShMask.isSubsetOf(AndMask)) {
2845 ShAmt = ShAmt.getOperand(0);
2846 } else {
2847 // SimplifyDemandedBits may have optimized the mask so try restoring any
2848 // bits that are known zero.
2849 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
2850 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
2851 return true;
2852 ShAmt = ShAmt.getOperand(0);
2853 }
2854 }
2855
2856 if (ShAmt.getOpcode() == ISD::ADD &&
2857 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2858 uint64_t Imm = ShAmt.getConstantOperandVal(1);
2859 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
2860 // to avoid the ADD.
2861 if (Imm != 0 && Imm % ShiftWidth == 0) {
2862 ShAmt = ShAmt.getOperand(0);
2863 return true;
2864 }
2865 } else if (ShAmt.getOpcode() == ISD::SUB &&
2866 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
2867 uint64_t Imm = ShAmt.getConstantOperandVal(0);
2868 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2869 // generate a NEG instead of a SUB of a constant.
2870 if (Imm != 0 && Imm % ShiftWidth == 0) {
2871 SDLoc DL(ShAmt);
2872 EVT VT = ShAmt.getValueType();
2873 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
2874 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
2875 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
2876 ShAmt.getOperand(1));
2877 ShAmt = SDValue(Neg, 0);
2878 return true;
2879 }
2880 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2881 // to generate a NOT instead of a SUB of a constant.
2882 if (Imm % ShiftWidth == ShiftWidth - 1) {
2883 SDLoc DL(ShAmt);
2884 EVT VT = ShAmt.getValueType();
2886 RISCV::XORI, DL, VT, ShAmt.getOperand(1),
2887 CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true));
2888 ShAmt = SDValue(Not, 0);
2889 return true;
2890 }
2891 }
2892
2893 return true;
2894}
2895
2896/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2897/// check for equality with 0. This function emits instructions that convert the
2898/// seteq/setne into something that can be compared with 0.
2899/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
2900/// ISD::SETNE).
2902 SDValue &Val) {
2903 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
2904 "Unexpected condition code!");
2905
2906 // We're looking for a setcc.
2907 if (N->getOpcode() != ISD::SETCC)
2908 return false;
2909
2910 // Must be an equality comparison.
2911 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
2912 if (CCVal != ExpectedCCVal)
2913 return false;
2914
2915 SDValue LHS = N->getOperand(0);
2916 SDValue RHS = N->getOperand(1);
2917
2918 if (!LHS.getValueType().isScalarInteger())
2919 return false;
2920
2921 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
2922 if (isNullConstant(RHS)) {
2923 Val = LHS;
2924 return true;
2925 }
2926
2927 SDLoc DL(N);
2928
2929 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
2930 int64_t CVal = C->getSExtValue();
2931 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2932 // non-zero otherwise.
2933 if (CVal == -2048) {
2935 RISCV::XORI, DL, N->getValueType(0), LHS,
2936 CurDAG->getSignedConstant(CVal, DL, N->getValueType(0),
2937 /*isTarget=*/true)),
2938 0);
2939 return true;
2940 }
2941 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
2942 // LHS is equal to the RHS and non-zero otherwise.
2943 if (isInt<12>(CVal) || CVal == 2048) {
2945 RISCV::ADDI, DL, N->getValueType(0), LHS,
2946 CurDAG->getSignedConstant(-CVal, DL, N->getValueType(0),
2947 /*isTarget=*/true)),
2948 0);
2949 return true;
2950 }
2951 }
2952
2953 // If nothing else we can XOR the LHS and RHS to produce zero if they are
2954 // equal and a non-zero value if they aren't.
2955 Val = SDValue(
2956 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
2957 return true;
2958}
2959
2961 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
2962 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
2963 Val = N.getOperand(0);
2964 return true;
2965 }
2966
2967 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
2968 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
2969 return N;
2970
2971 SDValue N0 = N.getOperand(0);
2972 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2973 N.getConstantOperandVal(1) == ShiftAmt &&
2974 N0.getConstantOperandVal(1) == ShiftAmt)
2975 return N0.getOperand(0);
2976
2977 return N;
2978 };
2979
2980 MVT VT = N.getSimpleValueType();
2981 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
2982 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
2983 return true;
2984 }
2985
2986 return false;
2987}
2988
2990 if (N.getOpcode() == ISD::AND) {
2991 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
2992 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
2993 Val = N.getOperand(0);
2994 return true;
2995 }
2996 }
2997 MVT VT = N.getSimpleValueType();
2998 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
2999 if (CurDAG->MaskedValueIsZero(N, Mask)) {
3000 Val = N;
3001 return true;
3002 }
3003
3004 return false;
3005}
3006
3007/// Look for various patterns that can be done with a SHL that can be folded
3008/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
3009/// SHXADD we are trying to match.
3011 SDValue &Val) {
3012 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
3013 SDValue N0 = N.getOperand(0);
3014
3015 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
3016 (LeftShift || N0.getOpcode() == ISD::SRL) &&
3017 isa<ConstantSDNode>(N0.getOperand(1))) {
3018 uint64_t Mask = N.getConstantOperandVal(1);
3019 unsigned C2 = N0.getConstantOperandVal(1);
3020
3021 unsigned XLen = Subtarget->getXLen();
3022 if (LeftShift)
3023 Mask &= maskTrailingZeros<uint64_t>(C2);
3024 else
3025 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
3026
3027 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
3028 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
3029 // followed by a SHXADD with c3 for the X amount.
3030 if (isShiftedMask_64(Mask)) {
3031 unsigned Leading = XLen - llvm::bit_width(Mask);
3032 unsigned Trailing = llvm::countr_zero(Mask);
3033 if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
3034 SDLoc DL(N);
3035 EVT VT = N.getValueType();
3037 RISCV::SRLI, DL, VT, N0.getOperand(0),
3038 CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
3039 0);
3040 return true;
3041 }
3042 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3043 // leading zeros and c3 trailing zeros. We can use an SRLI by C3
3044 // followed by a SHXADD using c3 for the X amount.
3045 if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
3046 SDLoc DL(N);
3047 EVT VT = N.getValueType();
3048 Val = SDValue(
3050 RISCV::SRLI, DL, VT, N0.getOperand(0),
3051 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
3052 0);
3053 return true;
3054 }
3055 }
3056 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
3057 isa<ConstantSDNode>(N.getOperand(1))) {
3058 uint64_t Mask = N.getConstantOperandVal(1);
3059 unsigned C2 = N0.getConstantOperandVal(1);
3060
3061 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
3062 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
3063 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
3064 // the X amount.
3065 if (isShiftedMask_64(Mask)) {
3066 unsigned XLen = Subtarget->getXLen();
3067 unsigned Leading = XLen - llvm::bit_width(Mask);
3068 unsigned Trailing = llvm::countr_zero(Mask);
3069 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
3070 SDLoc DL(N);
3071 EVT VT = N.getValueType();
3073 RISCV::SRAI, DL, VT, N0.getOperand(0),
3074 CurDAG->getTargetConstant(C2 - Leading, DL, VT)),
3075 0);
3077 RISCV::SRLI, DL, VT, Val,
3078 CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)),
3079 0);
3080 return true;
3081 }
3082 }
3083 }
3084 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
3085 (LeftShift || N.getOpcode() == ISD::SRL) &&
3086 isa<ConstantSDNode>(N.getOperand(1))) {
3087 SDValue N0 = N.getOperand(0);
3088 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
3089 isa<ConstantSDNode>(N0.getOperand(1))) {
3090 uint64_t Mask = N0.getConstantOperandVal(1);
3091 if (isShiftedMask_64(Mask)) {
3092 unsigned C1 = N.getConstantOperandVal(1);
3093 unsigned XLen = Subtarget->getXLen();
3094 unsigned Leading = XLen - llvm::bit_width(Mask);
3095 unsigned Trailing = llvm::countr_zero(Mask);
3096 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
3097 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
3098 if (LeftShift && Leading == 32 && Trailing > 0 &&
3099 (Trailing + C1) == ShAmt) {
3100 SDLoc DL(N);
3101 EVT VT = N.getValueType();
3103 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3104 CurDAG->getTargetConstant(Trailing, DL, VT)),
3105 0);
3106 return true;
3107 }
3108 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3109 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3110 if (!LeftShift && Leading == 32 && Trailing > C1 &&
3111 (Trailing - C1) == ShAmt) {
3112 SDLoc DL(N);
3113 EVT VT = N.getValueType();
3115 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3116 CurDAG->getTargetConstant(Trailing, DL, VT)),
3117 0);
3118 return true;
3119 }
3120 }
3121 }
3122 }
3123
3124 return false;
3125}
3126
3127/// Look for various patterns that can be done with a SHL that can be folded
3128/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3129/// SHXADD_UW we are trying to match.
3131 SDValue &Val) {
3132 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
3133 N.hasOneUse()) {
3134 SDValue N0 = N.getOperand(0);
3135 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3136 N0.hasOneUse()) {
3137 uint64_t Mask = N.getConstantOperandVal(1);
3138 unsigned C2 = N0.getConstantOperandVal(1);
3139
3140 Mask &= maskTrailingZeros<uint64_t>(C2);
3141
3142 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3143 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3144 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3145 if (isShiftedMask_64(Mask)) {
3146 unsigned Leading = llvm::countl_zero(Mask);
3147 unsigned Trailing = llvm::countr_zero(Mask);
3148 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3149 SDLoc DL(N);
3150 EVT VT = N.getValueType();
3152 RISCV::SLLI, DL, VT, N0.getOperand(0),
3153 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
3154 0);
3155 return true;
3156 }
3157 }
3158 }
3159 }
3160
3161 return false;
3162}
3163
3164static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
3165 unsigned Bits,
3166 const TargetInstrInfo *TII) {
3167 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
3168
3169 if (!MCOpcode)
3170 return false;
3171
3172 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
3173 const uint64_t TSFlags = MCID.TSFlags;
3174 if (!RISCVII::hasSEWOp(TSFlags))
3175 return false;
3176 assert(RISCVII::hasVLOp(TSFlags));
3177
3178 bool HasGlueOp = User->getGluedNode() != nullptr;
3179 unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
3180 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
3181 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
3182 unsigned VLIdx =
3183 User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3184 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
3185
3186 if (UserOpNo == VLIdx)
3187 return false;
3188
3189 auto NumDemandedBits =
3190 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
3191 return NumDemandedBits && Bits >= *NumDemandedBits;
3192}
3193
3194// Return true if all users of this SDNode* only consume the lower \p Bits.
3195// This can be used to form W instructions for add/sub/mul/shl even when the
3196// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3197// SimplifyDemandedBits has made it so some users see a sext_inreg and some
3198// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3199// the add/sub/mul/shl to become non-W instructions. By checking the users we
3200// may be able to use a W instruction and CSE with the other instruction if
3201// this has happened. We could try to detect that the CSE opportunity exists
3202// before doing this, but that would be more complicated.
3204 const unsigned Depth) const {
3205 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3206 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3207 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3208 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3209 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3210 isa<ConstantSDNode>(Node) || Depth != 0) &&
3211 "Unexpected opcode");
3212
3214 return false;
3215
3216 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3217 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3218 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
3219 return false;
3220
3221 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
3222 SDNode *User = *UI;
3223 // Users of this node should have already been instruction selected
3224 if (!User->isMachineOpcode())
3225 return false;
3226
3227 // TODO: Add more opcodes?
3228 switch (User->getMachineOpcode()) {
3229 default:
3230 if (vectorPseudoHasAllNBitUsers(User, UI.getOperandNo(), Bits, TII))
3231 break;
3232 return false;
3233 case RISCV::ADDW:
3234 case RISCV::ADDIW:
3235 case RISCV::SUBW:
3236 case RISCV::MULW:
3237 case RISCV::SLLW:
3238 case RISCV::SLLIW:
3239 case RISCV::SRAW:
3240 case RISCV::SRAIW:
3241 case RISCV::SRLW:
3242 case RISCV::SRLIW:
3243 case RISCV::DIVW:
3244 case RISCV::DIVUW:
3245 case RISCV::REMW:
3246 case RISCV::REMUW:
3247 case RISCV::ROLW:
3248 case RISCV::RORW:
3249 case RISCV::RORIW:
3250 case RISCV::CLZW:
3251 case RISCV::CTZW:
3252 case RISCV::CPOPW:
3253 case RISCV::SLLI_UW:
3254 case RISCV::FMV_W_X:
3255 case RISCV::FCVT_H_W:
3256 case RISCV::FCVT_H_W_INX:
3257 case RISCV::FCVT_H_WU:
3258 case RISCV::FCVT_H_WU_INX:
3259 case RISCV::FCVT_S_W:
3260 case RISCV::FCVT_S_W_INX:
3261 case RISCV::FCVT_S_WU:
3262 case RISCV::FCVT_S_WU_INX:
3263 case RISCV::FCVT_D_W:
3264 case RISCV::FCVT_D_W_INX:
3265 case RISCV::FCVT_D_WU:
3266 case RISCV::FCVT_D_WU_INX:
3267 case RISCV::TH_REVW:
3268 case RISCV::TH_SRRIW:
3269 if (Bits >= 32)
3270 break;
3271 return false;
3272 case RISCV::SLL:
3273 case RISCV::SRA:
3274 case RISCV::SRL:
3275 case RISCV::ROL:
3276 case RISCV::ROR:
3277 case RISCV::BSET:
3278 case RISCV::BCLR:
3279 case RISCV::BINV:
3280 // Shift amount operands only use log2(Xlen) bits.
3281 if (UI.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
3282 break;
3283 return false;
3284 case RISCV::SLLI:
3285 // SLLI only uses the lower (XLen - ShAmt) bits.
3286 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
3287 break;
3288 return false;
3289 case RISCV::ANDI:
3290 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
3291 break;
3292 goto RecCheck;
3293 case RISCV::ORI: {
3294 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3295 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
3296 break;
3297 [[fallthrough]];
3298 }
3299 case RISCV::AND:
3300 case RISCV::OR:
3301 case RISCV::XOR:
3302 case RISCV::XORI:
3303 case RISCV::ANDN:
3304 case RISCV::ORN:
3305 case RISCV::XNOR:
3306 case RISCV::SH1ADD:
3307 case RISCV::SH2ADD:
3308 case RISCV::SH3ADD:
3309 RecCheck:
3310 if (hasAllNBitUsers(User, Bits, Depth + 1))
3311 break;
3312 return false;
3313 case RISCV::SRLI: {
3314 unsigned ShAmt = User->getConstantOperandVal(1);
3315 // If we are shifting right by less than Bits, and users don't demand any
3316 // bits that were shifted into [Bits-1:0], then we can consider this as an
3317 // N-Bit user.
3318 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
3319 break;
3320 return false;
3321 }
3322 case RISCV::SEXT_B:
3323 case RISCV::PACKH:
3324 if (Bits >= 8)
3325 break;
3326 return false;
3327 case RISCV::SEXT_H:
3328 case RISCV::FMV_H_X:
3329 case RISCV::ZEXT_H_RV32:
3330 case RISCV::ZEXT_H_RV64:
3331 case RISCV::PACKW:
3332 if (Bits >= 16)
3333 break;
3334 return false;
3335 case RISCV::PACK:
3336 if (Bits >= (Subtarget->getXLen() / 2))
3337 break;
3338 return false;
3339 case RISCV::ADD_UW:
3340 case RISCV::SH1ADD_UW:
3341 case RISCV::SH2ADD_UW:
3342 case RISCV::SH3ADD_UW:
3343 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3344 // 32 bits.
3345 if (UI.getOperandNo() == 0 && Bits >= 32)
3346 break;
3347 return false;
3348 case RISCV::SB:
3349 if (UI.getOperandNo() == 0 && Bits >= 8)
3350 break;
3351 return false;
3352 case RISCV::SH:
3353 if (UI.getOperandNo() == 0 && Bits >= 16)
3354 break;
3355 return false;
3356 case RISCV::SW:
3357 if (UI.getOperandNo() == 0 && Bits >= 32)
3358 break;
3359 return false;
3360 }
3361 }
3362
3363 return true;
3364}
3365
3366// Select a constant that can be represented as (sign_extend(imm5) << imm2).
3368 SDValue &Shl2) {
3369 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3370 int64_t Offset = C->getSExtValue();
3371 int64_t Shift;
3372 for (Shift = 0; Shift < 4; Shift++)
3373 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
3374 break;
3375
3376 // Constant cannot be encoded.
3377 if (Shift == 4)
3378 return false;
3379
3380 EVT Ty = N->getValueType(0);
3381 Simm5 = CurDAG->getSignedConstant(Offset >> Shift, SDLoc(N), Ty,
3382 /*isTarget=*/true);
3383 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
3384 return true;
3385 }
3386
3387 return false;
3388}
3389
3390// Select VL as a 5 bit immediate or a value that will become a register. This
3391// allows us to choose betwen VSETIVLI or VSETVLI later.
3393 auto *C = dyn_cast<ConstantSDNode>(N);
3394 if (C && isUInt<5>(C->getZExtValue())) {
3395 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
3396 N->getValueType(0));
3397 } else if (C && C->isAllOnes()) {
3398 // Treat all ones as VLMax.
3400 N->getValueType(0), /*isTarget=*/true);
3401 } else if (isa<RegisterSDNode>(N) &&
3402 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
3403 // All our VL operands use an operand that allows GPRNoX0 or an immediate
3404 // as the register class. Convert X0 to a special immediate to pass the
3405 // MachineVerifier. This is recognized specially by the vsetvli insertion
3406 // pass.
3408 N->getValueType(0), /*isTarget=*/true);
3409 } else {
3410 VL = N;
3411 }
3412
3413 return true;
3414}
3415
3417 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
3418 if (!N.getOperand(0).isUndef())
3419 return SDValue();
3420 N = N.getOperand(1);
3421 }
3422 SDValue Splat = N;
3423 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
3424 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
3425 !Splat.getOperand(0).isUndef())
3426 return SDValue();
3427 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
3428 return Splat;
3429}
3430
3433 if (!Splat)
3434 return false;
3435
3436 SplatVal = Splat.getOperand(1);
3437 return true;
3438}
3439
3441 SelectionDAG &DAG,
3442 const RISCVSubtarget &Subtarget,
3443 std::function<bool(int64_t)> ValidateImm) {
3445 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
3446 return false;
3447
3448 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
3449 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
3450 "Unexpected splat operand type");
3451
3452 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3453 // type is wider than the resulting vector element type: an implicit
3454 // truncation first takes place. Therefore, perform a manual
3455 // truncation/sign-extension in order to ignore any truncated bits and catch
3456 // any zero-extended immediate.
3457 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3458 // sign-extending to (XLenVT -1).
3459 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
3460
3461 int64_t SplatImm = SplatConst.getSExtValue();
3462
3463 if (!ValidateImm(SplatImm))
3464 return false;
3465
3466 SplatVal = DAG.getSignedConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT(),
3467 /*isTarget=*/true);
3468 return true;
3469}
3470
3472 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
3473 [](int64_t Imm) { return isInt<5>(Imm); });
3474}
3475
3477 return selectVSplatImmHelper(
3478 N, SplatVal, *CurDAG, *Subtarget,
3479 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
3480}
3481
3483 SDValue &SplatVal) {
3484 return selectVSplatImmHelper(
3485 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
3486 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
3487 });
3488}
3489
3491 SDValue &SplatVal) {
3492 return selectVSplatImmHelper(
3493 N, SplatVal, *CurDAG, *Subtarget,
3494 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
3495}
3496
3498 auto IsExtOrTrunc = [](SDValue N) {
3499 switch (N->getOpcode()) {
3500 case ISD::SIGN_EXTEND:
3501 case ISD::ZERO_EXTEND:
3502 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
3503 // inactive elements will be undef.
3505 case RISCVISD::VSEXT_VL:
3506 case RISCVISD::VZEXT_VL:
3507 return true;
3508 default:
3509 return false;
3510 }
3511 };
3512
3513 // We can have multiple nested nodes, so unravel them all if needed.
3514 while (IsExtOrTrunc(N)) {
3515 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
3516 return false;
3517 N = N->getOperand(0);
3518 }
3519
3520 return selectVSplat(N, SplatVal);
3521}
3522
3524 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3525 if (!CFP)
3526 return false;
3527 const APFloat &APF = CFP->getValueAPF();
3528 // td can handle +0.0 already.
3529 if (APF.isPosZero())
3530 return false;
3531
3532 MVT VT = CFP->getSimpleValueType(0);
3533
3534 // Even if this FPImm requires an additional FNEG (i.e. the second element of
3535 // the returned pair is true) we still prefer FLI + FNEG over immediate
3536 // materialization as the latter might generate a longer instruction sequence.
3537 if (static_cast<const RISCVTargetLowering *>(TLI)
3538 ->getLegalZfaFPImm(APF, VT)
3539 .first >= 0)
3540 return false;
3541
3542 MVT XLenVT = Subtarget->getXLenVT();
3543 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3544 assert(APF.isNegZero() && "Unexpected constant.");
3545 return false;
3546 }
3547 SDLoc DL(N);
3548 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3549 *Subtarget);
3550 return true;
3551}
3552
3554 SDValue &Imm) {
3555 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3556 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3557
3558 if (!isInt<5>(ImmVal))
3559 return false;
3560
3561 Imm = CurDAG->getSignedConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT(),
3562 /*isTarget=*/true);
3563 return true;
3564 }
3565
3566 return false;
3567}
3568
3569// Try to remove sext.w if the input is a W instruction or can be made into
3570// a W instruction cheaply.
3571bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3572 // Look for the sext.w pattern, addiw rd, rs1, 0.
3573 if (N->getMachineOpcode() != RISCV::ADDIW ||
3574 !isNullConstant(N->getOperand(1)))
3575 return false;
3576
3577 SDValue N0 = N->getOperand(0);
3578 if (!N0.isMachineOpcode())
3579 return false;
3580
3581 switch (N0.getMachineOpcode()) {
3582 default:
3583 break;
3584 case RISCV::ADD:
3585 case RISCV::ADDI:
3586 case RISCV::SUB:
3587 case RISCV::MUL:
3588 case RISCV::SLLI: {
3589 // Convert sext.w+add/sub/mul to their W instructions. This will create
3590 // a new independent instruction. This improves latency.
3591 unsigned Opc;
3592 switch (N0.getMachineOpcode()) {
3593 default:
3594 llvm_unreachable("Unexpected opcode!");
3595 case RISCV::ADD: Opc = RISCV::ADDW; break;
3596 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3597 case RISCV::SUB: Opc = RISCV::SUBW; break;
3598 case RISCV::MUL: Opc = RISCV::MULW; break;
3599 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3600 }
3601
3602 SDValue N00 = N0.getOperand(0);
3603 SDValue N01 = N0.getOperand(1);
3604
3605 // Shift amount needs to be uimm5.
3606 if (N0.getMachineOpcode() == RISCV::SLLI &&
3607 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
3608 break;
3609
3610 SDNode *Result =
3611 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
3612 N00, N01);
3613 ReplaceUses(N, Result);
3614 return true;
3615 }
3616 case RISCV::ADDW:
3617 case RISCV::ADDIW:
3618 case RISCV::SUBW:
3619 case RISCV::MULW:
3620 case RISCV::SLLIW:
3621 case RISCV::PACKW:
3622 case RISCV::TH_MULAW:
3623 case RISCV::TH_MULAH:
3624 case RISCV::TH_MULSW:
3625 case RISCV::TH_MULSH:
3626 if (N0.getValueType() == MVT::i32)
3627 break;
3628
3629 // Result is already sign extended just remove the sext.w.
3630 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
3631 ReplaceUses(N, N0.getNode());
3632 return true;
3633 }
3634
3635 return false;
3636}
3637
3638// After ISel, a vector pseudo's mask will be copied to V0 via a CopyToReg
3639// that's glued to the pseudo. This tries to look up the value that was copied
3640// to V0.
3641static SDValue getMaskSetter(SDValue MaskOp, SDValue GlueOp) {
3642 // Check that we're using V0 as a mask register.
3643 if (!isa<RegisterSDNode>(MaskOp) ||
3644 cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)
3645 return SDValue();
3646
3647 // The glued user defines V0.
3648 const auto *Glued = GlueOp.getNode();
3649
3650 if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
3651 return SDValue();
3652
3653 // Check that we're defining V0 as a mask register.
3654 if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
3655 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
3656 return SDValue();
3657
3658 SDValue MaskSetter = Glued->getOperand(2);
3659
3660 // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
3661 // from an extract_subvector or insert_subvector.
3662 if (MaskSetter->isMachineOpcode() &&
3663 MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)
3664 MaskSetter = MaskSetter->getOperand(0);
3665
3666 return MaskSetter;
3667}
3668
3669static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
3670 // Check the instruction defining V0; it needs to be a VMSET pseudo.
3671 SDValue MaskSetter = getMaskSetter(MaskOp, GlueOp);
3672 if (!MaskSetter)
3673 return false;
3674
3675 const auto IsVMSet = [](unsigned Opc) {
3676 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
3677 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
3678 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
3679 Opc == RISCV::PseudoVMSET_M_B8;
3680 };
3681
3682 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3683 // undefined behaviour if it's the wrong bitwidth, so we could choose to
3684 // assume that it's all-ones? Same applies to its VL.
3685 return MaskSetter->isMachineOpcode() &&
3686 IsVMSet(MaskSetter.getMachineOpcode());
3687}
3688
3689// Return true if we can make sure mask of N is all-ones mask.
3690static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
3691 return usesAllOnesMask(N->getOperand(MaskOpIdx),
3692 N->getOperand(N->getNumOperands() - 1));
3693}
3694
3695static bool isImplicitDef(SDValue V) {
3696 if (!V.isMachineOpcode())
3697 return false;
3698 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
3699 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
3700 if (!isImplicitDef(V.getOperand(I)))
3701 return false;
3702 return true;
3703 }
3704 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
3705}
3706
3707// Optimize masked RVV pseudo instructions with a known all-ones mask to their
3708// corresponding "unmasked" pseudo versions. The mask we're interested in will
3709// take the form of a V0 physical register operand, with a glued
3710// register-setting instruction.
3711bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
3713 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
3714 if (!I)
3715 return false;
3716
3717 unsigned MaskOpIdx = I->MaskOpIdx;
3718 if (!usesAllOnesMask(N, MaskOpIdx))
3719 return false;
3720
3721 // There are two classes of pseudos in the table - compares and
3722 // everything else. See the comment on RISCVMaskedPseudo for details.
3723 const unsigned Opc = I->UnmaskedPseudo;
3724 const MCInstrDesc &MCID = TII->get(Opc);
3725 const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags);
3726#ifndef NDEBUG
3727 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
3730 "Masked and unmasked pseudos are inconsistent");
3731 const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID);
3732 assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure");
3733#endif
3734
3736 // Skip the passthru operand at index 0 if !UseTUPseudo.
3737 for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) {
3738 // Skip the mask, and the Glue.
3739 SDValue Op = N->getOperand(I);
3740 if (I == MaskOpIdx || Op.getValueType() == MVT::Glue)
3741 continue;
3742 Ops.push_back(Op);
3743 }
3744
3745 // Transitively apply any node glued to our new node.
3746 const auto *Glued = N->getGluedNode();
3747 if (auto *TGlued = Glued->getGluedNode())
3748 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
3749
3751 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3752
3753 if (!N->memoperands_empty())
3754 CurDAG->setNodeMemRefs(Result, N->memoperands());
3755
3756 Result->setFlags(N->getFlags());
3757 ReplaceUses(N, Result);
3758
3759 return true;
3760}
3761
3762static bool IsVMerge(SDNode *N) {
3763 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
3764}
3765
3766// Try to fold away VMERGE_VVM instructions into their true operands:
3767//
3768// %true = PseudoVADD_VV ...
3769// %x = PseudoVMERGE_VVM %false, %false, %true, %mask
3770// ->
3771// %x = PseudoVADD_VV_MASK %false, ..., %mask
3772//
3773// We can only fold if vmerge's passthru operand, vmerge's false operand and
3774// %true's passthru operand (if it has one) are the same. This is because we
3775// have to consolidate them into one passthru operand in the result.
3776//
3777// If %true is masked, then we can use its mask instead of vmerge's if vmerge's
3778// mask is all ones.
3779//
3780// The resulting VL is the minimum of the two VLs.
3781//
3782// The resulting policy is the effective policy the vmerge would have had,
3783// i.e. whether or not it's passthru operand was implicit-def.
3784bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
3785 SDValue Passthru, False, True, VL, Mask, Glue;
3786 assert(IsVMerge(N));
3787 Passthru = N->getOperand(0);
3788 False = N->getOperand(1);
3789 True = N->getOperand(2);
3790 Mask = N->getOperand(3);
3791 VL = N->getOperand(4);
3792 // We always have a glue node for the mask at v0.
3793 Glue = N->getOperand(N->getNumOperands() - 1);
3794 assert(cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
3795 assert(Glue.getValueType() == MVT::Glue);
3796
3797 // If the EEW of True is different from vmerge's SEW, then we can't fold.
3798 if (True.getSimpleValueType() != N->getSimpleValueType(0))
3799 return false;
3800
3801 // We require that either passthru and false are the same, or that passthru
3802 // is undefined.
3803 if (Passthru != False && !isImplicitDef(Passthru))
3804 return false;
3805
3806 assert(True.getResNo() == 0 &&
3807 "Expect True is the first output of an instruction.");
3808
3809 // Need N is the exactly one using True.
3810 if (!True.hasOneUse())
3811 return false;
3812
3813 if (!True.isMachineOpcode())
3814 return false;
3815
3816 unsigned TrueOpc = True.getMachineOpcode();
3817 const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
3818 uint64_t TrueTSFlags = TrueMCID.TSFlags;
3819 bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
3820
3821 bool IsMasked = false;
3823 RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
3824 if (!Info && HasTiedDest) {
3825 Info = RISCV::getMaskedPseudoInfo(TrueOpc);
3826 IsMasked = true;
3827 }
3828 assert(!(IsMasked && !HasTiedDest) && "Expected tied dest");
3829
3830 if (!Info)
3831 return false;
3832
3833 // If True has a passthru operand then it needs to be the same as vmerge's
3834 // False, since False will be used for the result's passthru operand.
3835 if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
3836 SDValue PassthruOpTrue = True->getOperand(0);
3837 if (False != PassthruOpTrue)
3838 return false;
3839 }
3840
3841 // If True is masked then the vmerge must have either the same mask or an all
3842 // 1s mask, since we're going to keep the mask from True.
3843 if (IsMasked) {
3844 // FIXME: Support mask agnostic True instruction which would have an
3845 // undef passthru operand.
3846 SDValue TrueMask =
3847 getMaskSetter(True->getOperand(Info->MaskOpIdx),
3848 True->getOperand(True->getNumOperands() - 1));
3849 assert(TrueMask);
3850 if (!usesAllOnesMask(Mask, Glue) && getMaskSetter(Mask, Glue) != TrueMask)
3851 return false;
3852 }
3853
3854 // Skip if True has side effect.
3855 if (TII->get(TrueOpc).hasUnmodeledSideEffects())
3856 return false;
3857
3858 // The last operand of a masked instruction may be glued.
3859 bool HasGlueOp = True->getGluedNode() != nullptr;
3860
3861 // The chain operand may exist either before the glued operands or in the last
3862 // position.
3863 unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
3864 bool HasChainOp =
3865 True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
3866
3867 if (HasChainOp) {
3868 // Avoid creating cycles in the DAG. We must ensure that none of the other
3869 // operands depend on True through it's Chain.
3870 SmallVector<const SDNode *, 4> LoopWorklist;
3872 LoopWorklist.push_back(False.getNode());
3873 LoopWorklist.push_back(Mask.getNode());
3874 LoopWorklist.push_back(VL.getNode());
3875 LoopWorklist.push_back(Glue.getNode());
3876 if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
3877 return false;
3878 }
3879
3880 // The vector policy operand may be present for masked intrinsics
3881 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
3882 unsigned TrueVLIndex =
3883 True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3884 SDValue TrueVL = True.getOperand(TrueVLIndex);
3885 SDValue SEW = True.getOperand(TrueVLIndex + 1);
3886
3887 auto GetMinVL = [](SDValue LHS, SDValue RHS) {
3888 if (LHS == RHS)
3889 return LHS;
3890 if (isAllOnesConstant(LHS))
3891 return RHS;
3892 if (isAllOnesConstant(RHS))
3893 return LHS;
3894 auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
3895 auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
3896 if (!CLHS || !CRHS)
3897 return SDValue();
3898 return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
3899 };
3900
3901 // Because N and True must have the same passthru operand (or True's operand
3902 // is implicit_def), the "effective" body is the minimum of their VLs.
3903 SDValue OrigVL = VL;
3904 VL = GetMinVL(TrueVL, VL);
3905 if (!VL)
3906 return false;
3907
3908 // Some operations produce different elementwise results depending on the
3909 // active elements, like viota.m or vredsum. This transformation is illegal
3910 // for these if we change the active elements (i.e. mask or VL).
3911 const MCInstrDesc &TrueBaseMCID = TII->get(RISCV::getRVVMCOpcode(TrueOpc));
3912 if (RISCVII::activeElementsAffectResult(TrueBaseMCID.TSFlags)) {
3913 if (Mask && !usesAllOnesMask(Mask, Glue))
3914 return false;
3915 if (TrueVL != VL)
3916 return false;
3917 }
3918
3919 // If we end up changing the VL or mask of True, then we need to make sure it
3920 // doesn't raise any observable fp exceptions, since changing the active
3921 // elements will affect how fflags is set.
3922 if (TrueVL != VL || !IsMasked)
3923 if (mayRaiseFPException(True.getNode()) &&
3924 !True->getFlags().hasNoFPExcept())
3925 return false;
3926
3927 SDLoc DL(N);
3928
3929 // From the preconditions we checked above, we know the mask and thus glue
3930 // for the result node will be taken from True.
3931 if (IsMasked) {
3932 Mask = True->getOperand(Info->MaskOpIdx);
3933 Glue = True->getOperand(True->getNumOperands() - 1);
3934 assert(Glue.getValueType() == MVT::Glue);
3935 }
3936
3937 unsigned MaskedOpc = Info->MaskedPseudo;
3938#ifndef NDEBUG
3939 const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
3941 "Expected instructions with mask have policy operand.");
3942 assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
3943 MCOI::TIED_TO) == 0 &&
3944 "Expected instructions with mask have a tied dest.");
3945#endif
3946
3947 // Use a tumu policy, relaxing it to tail agnostic provided that the passthru
3948 // operand is undefined.
3949 //
3950 // However, if the VL became smaller than what the vmerge had originally, then
3951 // elements past VL that were previously in the vmerge's body will have moved
3952 // to the tail. In that case we always need to use tail undisturbed to
3953 // preserve them.
3954 bool MergeVLShrunk = VL != OrigVL;
3955 uint64_t Policy = (isImplicitDef(Passthru) && !MergeVLShrunk)
3957 : /*TUMU*/ 0;
3958 SDValue PolicyOp =
3959 CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
3960
3961
3963 Ops.push_back(False);
3964
3965 const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
3966 const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode;
3967 assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx);
3968 Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
3969
3970 Ops.push_back(Mask);
3971
3972 // For unmasked "VOp" with rounding mode operand, that is interfaces like
3973 // (..., rm, vl) or (..., rm, vl, policy).
3974 // Its masked version is (..., vm, rm, vl, policy).
3975 // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
3976 if (HasRoundingMode)
3977 Ops.push_back(True->getOperand(TrueVLIndex - 1));
3978
3979 Ops.append({VL, SEW, PolicyOp});
3980
3981 // Result node should have chain operand of True.
3982 if (HasChainOp)
3983 Ops.push_back(True.getOperand(TrueChainOpIdx));
3984
3985 // Add the glue for the CopyToReg of mask->v0.
3986 Ops.push_back(Glue);
3987
3989 CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
3990 Result->setFlags(True->getFlags());
3991
3992 if (!cast<MachineSDNode>(True)->memoperands_empty())
3993 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
3994
3995 // Replace vmerge.vvm node by Result.
3996 ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
3997
3998 // Replace another value of True. E.g. chain and VL.
3999 for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
4000 ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
4001
4002 return true;
4003}
4004
4005bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
4006 bool MadeChange = false;
4008
4009 while (Position != CurDAG->allnodes_begin()) {
4010 SDNode *N = &*--Position;
4011 if (N->use_empty() || !N->isMachineOpcode())
4012 continue;
4013
4014 if (IsVMerge(N))
4015 MadeChange |= performCombineVMergeAndVOps(N);
4016 }
4017 return MadeChange;
4018}
4019
4020/// If our passthru is an implicit_def, use noreg instead. This side
4021/// steps issues with MachineCSE not being able to CSE expressions with
4022/// IMPLICIT_DEF operands while preserving the semantic intent. See
4023/// pr64282 for context. Note that this transform is the last one
4024/// performed at ISEL DAG to DAG.
4025bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4026 bool MadeChange = false;
4028
4029 while (Position != CurDAG->allnodes_begin()) {
4030 SDNode *N = &*--Position;
4031 if (N->use_empty() || !N->isMachineOpcode())
4032 continue;
4033
4034 const unsigned Opc = N->getMachineOpcode();
4035 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
4037 !isImplicitDef(N->getOperand(0)))
4038 continue;
4039
4041 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
4042 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4043 SDValue Op = N->getOperand(I);
4044 Ops.push_back(Op);
4045 }
4046
4048 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4049 Result->setFlags(N->getFlags());
4050 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
4051 ReplaceUses(N, Result);
4052 MadeChange = true;
4053 }
4054 return MadeChange;
4055}
4056
4057
4058// This pass converts a legalized DAG into a RISCV-specific DAG, ready
4059// for instruction scheduling.
4061 CodeGenOptLevel OptLevel) {
4062 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4063}
4064
4066
4068 CodeGenOptLevel OptLevel)
4070 ID, std::make_unique<RISCVDAGToDAGISel>(TM, OptLevel)) {}
4071
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
R600 Clause Merge
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp)
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b)
static SDValue findVSplat(SDValue N)
static SDValue getMaskSetter(SDValue MaskOp, SDValue GlueOp)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm)
static bool IsVMerge(SDNode *N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define PASS_NAME
Value * RHS
Value * LHS
bool isZero() const
Definition: APFloat.h:1356
APInt bitcastToAPInt() const
Definition: APFloat.h:1266
bool isPosZero() const
Definition: APFloat.h:1371
bool isNegZero() const
Definition: APFloat.h:1372
Class for arbitrary precision integers.
Definition: APInt.h:78
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1446
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1520
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
This class is used to form a handle around another node that is persistent and is updated across invo...
static StringRef getMemConstraintName(ConstraintCode C)
Definition: InlineAsm.h:467
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
Definition: MCInstrDesc.h:463
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TargetMachine, CodeGenOptLevel OptLevel)
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
void selectVSSEG(SDNode *Node, bool IsMasked, bool IsStrided)
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset shoule be all zeros.
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset)
bool SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void selectVLSEGFF(SDNode *Node, bool IsMasked)
bool selectFPImm(SDValue N, SDValue &Imm)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool hasAllBUsers(SDNode *Node) const
void selectVLXSEG(SDNode *Node, bool IsMasked, bool IsOrdered)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
void selectVSXSEG(SDNode *Node, bool IsMasked, bool IsOrdered)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset, bool IsINX=false)
void selectVLSEG(SDNode *Node, bool IsMasked, bool IsStrided)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
unsigned getXLen() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVII::VLMUL getLMUL(MVT VT)
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
const TargetLowering * TLI
MachineFunction * MF
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
bool mayRaiseFPException(SDNode *Node) const
Return whether the node may raise an FP exception.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:226
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:736
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:567
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:451
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:547
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:548
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:487
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:741
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:787
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:690
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:482
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:813
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:576
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:570
ilist< SDNode >::iterator allnodes_iterator
Definition: SelectionDAG.h:550
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
TargetInstrInfo - Interface to description of machine instruction set.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:573
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1309
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1099
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:953
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1289
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1305
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:641
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:848
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1552
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1603
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1648
static bool hasRoundModeOp(uint64_t TSFlags)
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool activeElementsAffectResult(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul)
unsigned encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic)
std::optional< unsigned > getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
static const MachineMemOperand::Flags MONontemporalBit1
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:255
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:285
unsigned M1(unsigned Val)
Definition: VE.h:376
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
@ Add
Sum of integers.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:581
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:381
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
bool hasNoFPExcept() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.