LLVM 20.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
20#include "llvm/IR/IntrinsicsRISCV.h"
22#include "llvm/Support/Debug.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE "riscv-isel"
29#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
30
32 "riscv-use-rematerializable-movimm", cl::Hidden,
33 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
34 "constant materialization"),
35 cl::init(false));
36
37namespace llvm::RISCV {
38#define GET_RISCVVSSEGTable_IMPL
39#define GET_RISCVVLSEGTable_IMPL
40#define GET_RISCVVLXSEGTable_IMPL
41#define GET_RISCVVSXSEGTable_IMPL
42#define GET_RISCVVLETable_IMPL
43#define GET_RISCVVSETable_IMPL
44#define GET_RISCVVLXTable_IMPL
45#define GET_RISCVVSXTable_IMPL
46#include "RISCVGenSearchableTables.inc"
47} // namespace llvm::RISCV
48
51
52 bool MadeChange = false;
53 while (Position != CurDAG->allnodes_begin()) {
54 SDNode *N = &*--Position;
55 if (N->use_empty())
56 continue;
57
58 SDValue Result;
59 switch (N->getOpcode()) {
60 case ISD::SPLAT_VECTOR: {
61 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
62 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
63 MVT VT = N->getSimpleValueType(0);
64 unsigned Opc =
66 SDLoc DL(N);
67 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
68 SDValue Src = N->getOperand(0);
69 if (VT.isInteger())
70 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
71 N->getOperand(0));
72 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
73 break;
74 }
76 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
77 // load. Done after lowering and combining so that we have a chance to
78 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
79 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
80 MVT VT = N->getSimpleValueType(0);
81 SDValue Passthru = N->getOperand(0);
82 SDValue Lo = N->getOperand(1);
83 SDValue Hi = N->getOperand(2);
84 SDValue VL = N->getOperand(3);
85 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
86 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
87 "Unexpected VTs!");
89 SDLoc DL(N);
90
91 // Create temporary stack for each expanding node.
92 SDValue StackSlot =
94 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
96
97 SDValue Chain = CurDAG->getEntryNode();
98 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
99
100 SDValue OffsetSlot =
102 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
103 Align(8));
104
105 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
106
107 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
108 SDValue IntID =
109 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
110 SDValue Ops[] = {Chain,
111 IntID,
112 Passthru,
113 StackSlot,
114 CurDAG->getRegister(RISCV::X0, MVT::i64),
115 VL};
116
118 MVT::i64, MPI, Align(8),
120 break;
121 }
122 }
123
124 if (Result) {
125 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
126 LLVM_DEBUG(N->dump(CurDAG));
127 LLVM_DEBUG(dbgs() << "\nNew: ");
128 LLVM_DEBUG(Result->dump(CurDAG));
129 LLVM_DEBUG(dbgs() << "\n");
130
132 MadeChange = true;
133 }
134 }
135
136 if (MadeChange)
138}
139
141 HandleSDNode Dummy(CurDAG->getRoot());
143
144 bool MadeChange = false;
145 while (Position != CurDAG->allnodes_begin()) {
146 SDNode *N = &*--Position;
147 // Skip dead nodes and any non-machine opcodes.
148 if (N->use_empty() || !N->isMachineOpcode())
149 continue;
150
151 MadeChange |= doPeepholeSExtW(N);
152
153 // FIXME: This is here only because the VMerge transform doesn't
154 // know how to handle masked true inputs. Once that has been moved
155 // to post-ISEL, this can be deleted as well.
156 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
157 }
158
159 CurDAG->setRoot(Dummy.getValue());
160
161 MadeChange |= doPeepholeMergeVVMFold();
162
163 // After we're done with everything else, convert IMPLICIT_DEF
164 // passthru operands to NoRegister. This is required to workaround
165 // an optimization deficiency in MachineCSE. This really should
166 // be merged back into each of the patterns (i.e. there's no good
167 // reason not to go directly to NoReg), but is being done this way
168 // to allow easy backporting.
169 MadeChange |= doPeepholeNoRegPassThru();
170
171 if (MadeChange)
173}
174
175static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
177 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
178 for (const RISCVMatInt::Inst &Inst : Seq) {
179 SDValue SDImm = CurDAG->getSignedTargetConstant(Inst.getImm(), DL, VT);
180 SDNode *Result = nullptr;
181 switch (Inst.getOpndKind()) {
182 case RISCVMatInt::Imm:
183 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
184 break;
186 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
187 CurDAG->getRegister(RISCV::X0, VT));
188 break;
190 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
191 break;
193 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
194 break;
195 }
196
197 // Only the first instruction has X0 as its source.
198 SrcReg = SDValue(Result, 0);
199 }
200
201 return SrcReg;
202}
203
204static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
205 int64_t Imm, const RISCVSubtarget &Subtarget) {
207
208 // Use a rematerializable pseudo instruction for short sequences if enabled.
209 if (Seq.size() == 2 && UsePseudoMovImm)
210 return SDValue(
211 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
212 CurDAG->getSignedTargetConstant(Imm, DL, VT)),
213 0);
214
215 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
216 // worst an LUI+ADDIW. This will require an extra register, but avoids a
217 // constant pool.
218 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
219 // low and high 32 bits are the same and bit 31 and 63 are set.
220 if (Seq.size() > 3) {
221 unsigned ShiftAmt, AddOpc;
223 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
224 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
225 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
226
227 SDValue SLLI = SDValue(
228 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
229 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
230 0);
231 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
232 }
233 }
234
235 // Otherwise, use the original sequence.
236 return selectImmSeq(CurDAG, DL, VT, Seq);
237}
238
240 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
241 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
242 bool IsLoad, MVT *IndexVT) {
243 SDValue Chain = Node->getOperand(0);
244 SDValue Glue;
245
246 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
247
248 if (IsStridedOrIndexed) {
249 Operands.push_back(Node->getOperand(CurOp++)); // Index.
250 if (IndexVT)
251 *IndexVT = Operands.back()->getSimpleValueType(0);
252 }
253
254 if (IsMasked) {
255 // Mask needs to be copied to V0.
256 SDValue Mask = Node->getOperand(CurOp++);
257 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
258 Glue = Chain.getValue(1);
259 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
260 }
261 SDValue VL;
262 selectVLOp(Node->getOperand(CurOp++), VL);
263 Operands.push_back(VL);
264
265 MVT XLenVT = Subtarget->getXLenVT();
266 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
267 Operands.push_back(SEWOp);
268
269 // At the IR layer, all the masked load intrinsics have policy operands,
270 // none of the others do. All have passthru operands. For our pseudos,
271 // all loads have policy operands.
272 if (IsLoad) {
274 if (IsMasked)
275 Policy = Node->getConstantOperandVal(CurOp++);
276 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
277 Operands.push_back(PolicyOp);
278 }
279
280 Operands.push_back(Chain); // Chain.
281 if (Glue)
282 Operands.push_back(Glue);
283}
284
285void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
286 bool IsStrided) {
287 SDLoc DL(Node);
288 MVT VT = Node->getSimpleValueType(0);
289 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
291
292 unsigned CurOp = 2;
294
295 Operands.push_back(Node->getOperand(CurOp++));
296
297 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
298 Operands, /*IsLoad=*/true);
299
300 const RISCV::VLSEGPseudo *P =
301 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
302 static_cast<unsigned>(LMUL));
303 MachineSDNode *Load =
304 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
305
306 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
307 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
308
309 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
310 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
311 CurDAG->RemoveDeadNode(Node);
312}
313
315 bool IsMasked) {
316 SDLoc DL(Node);
317 MVT VT = Node->getSimpleValueType(0);
318 MVT XLenVT = Subtarget->getXLenVT();
319 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
321
322 unsigned CurOp = 2;
324
325 Operands.push_back(Node->getOperand(CurOp++));
326
327 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
328 /*IsStridedOrIndexed*/ false, Operands,
329 /*IsLoad=*/true);
330
331 const RISCV::VLSEGPseudo *P =
332 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
333 Log2SEW, static_cast<unsigned>(LMUL));
334 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
335 XLenVT, MVT::Other, Operands);
336
337 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
338 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
339
340 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); // Result
341 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); // VL
342 ReplaceUses(SDValue(Node, 2), SDValue(Load, 2)); // Chain
343 CurDAG->RemoveDeadNode(Node);
344}
345
346void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
347 bool IsOrdered) {
348 SDLoc DL(Node);
349 MVT VT = Node->getSimpleValueType(0);
350 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
352
353 unsigned CurOp = 2;
355
356 Operands.push_back(Node->getOperand(CurOp++));
357
358 MVT IndexVT;
359 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
360 /*IsStridedOrIndexed*/ true, Operands,
361 /*IsLoad=*/true, &IndexVT);
362
363#ifndef NDEBUG
364 // Number of element = RVVBitsPerBlock * LMUL / SEW
365 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
366 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
367 if (DecodedLMUL.second)
368 ContainedTyNumElts /= DecodedLMUL.first;
369 else
370 ContainedTyNumElts *= DecodedLMUL.first;
371 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
372 "Element count mismatch");
373#endif
374
375 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
376 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
377 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
378 report_fatal_error("The V extension does not support EEW=64 for index "
379 "values when XLEN=32");
380 }
381 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
382 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
383 static_cast<unsigned>(IndexLMUL));
384 MachineSDNode *Load =
385 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
386
387 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
388 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
389
390 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
391 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
392 CurDAG->RemoveDeadNode(Node);
393}
394
395void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
396 bool IsStrided) {
397 SDLoc DL(Node);
398 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
399 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
401
402 unsigned CurOp = 2;
404
405 Operands.push_back(Node->getOperand(CurOp++));
406
407 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
408 Operands);
409
410 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
411 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
412 MachineSDNode *Store =
413 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
414
415 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
416 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
417
418 ReplaceNode(Node, Store);
419}
420
421void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
422 bool IsOrdered) {
423 SDLoc DL(Node);
424 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
425 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
427
428 unsigned CurOp = 2;
430
431 Operands.push_back(Node->getOperand(CurOp++));
432
433 MVT IndexVT;
434 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
435 /*IsStridedOrIndexed*/ true, Operands,
436 /*IsLoad=*/false, &IndexVT);
437
438#ifndef NDEBUG
439 // Number of element = RVVBitsPerBlock * LMUL / SEW
440 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
441 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
442 if (DecodedLMUL.second)
443 ContainedTyNumElts /= DecodedLMUL.first;
444 else
445 ContainedTyNumElts *= DecodedLMUL.first;
446 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
447 "Element count mismatch");
448#endif
449
450 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
451 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
452 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
453 report_fatal_error("The V extension does not support EEW=64 for index "
454 "values when XLEN=32");
455 }
456 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
457 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
458 static_cast<unsigned>(IndexLMUL));
459 MachineSDNode *Store =
460 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
461
462 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
463 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
464
465 ReplaceNode(Node, Store);
466}
467
469 if (!Subtarget->hasVInstructions())
470 return;
471
472 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
473
474 SDLoc DL(Node);
475 MVT XLenVT = Subtarget->getXLenVT();
476
477 unsigned IntNo = Node->getConstantOperandVal(0);
478
479 assert((IntNo == Intrinsic::riscv_vsetvli ||
480 IntNo == Intrinsic::riscv_vsetvlimax) &&
481 "Unexpected vsetvli intrinsic");
482
483 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
484 unsigned Offset = (VLMax ? 1 : 2);
485
486 assert(Node->getNumOperands() == Offset + 2 &&
487 "Unexpected number of operands");
488
489 unsigned SEW =
490 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
491 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
492 Node->getConstantOperandVal(Offset + 1) & 0x7);
493
494 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
495 /*MaskAgnostic*/ true);
496 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
497
498 SDValue VLOperand;
499 unsigned Opcode = RISCV::PseudoVSETVLI;
500 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
501 if (auto VLEN = Subtarget->getRealVLen())
502 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
503 VLMax = true;
504 }
505 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
506 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
507 Opcode = RISCV::PseudoVSETVLIX0;
508 } else {
509 VLOperand = Node->getOperand(1);
510
511 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
512 uint64_t AVL = C->getZExtValue();
513 if (isUInt<5>(AVL)) {
514 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
515 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
516 XLenVT, VLImm, VTypeIOp));
517 return;
518 }
519 }
520 }
521
522 ReplaceNode(Node,
523 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
524}
525
527 MVT VT = Node->getSimpleValueType(0);
528 unsigned Opcode = Node->getOpcode();
529 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
530 "Unexpected opcode");
531 SDLoc DL(Node);
532
533 // For operations of the form (x << C1) op C2, check if we can use
534 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
535 SDValue N0 = Node->getOperand(0);
536 SDValue N1 = Node->getOperand(1);
537
538 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
539 if (!Cst)
540 return false;
541
542 int64_t Val = Cst->getSExtValue();
543
544 // Check if immediate can already use ANDI/ORI/XORI.
545 if (isInt<12>(Val))
546 return false;
547
548 SDValue Shift = N0;
549
550 // If Val is simm32 and we have a sext_inreg from i32, then the binop
551 // produces at least 33 sign bits. We can peek through the sext_inreg and use
552 // a SLLIW at the end.
553 bool SignExt = false;
554 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
555 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
556 SignExt = true;
557 Shift = N0.getOperand(0);
558 }
559
560 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
561 return false;
562
563 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
564 if (!ShlCst)
565 return false;
566
567 uint64_t ShAmt = ShlCst->getZExtValue();
568
569 // Make sure that we don't change the operation by removing bits.
570 // This only matters for OR and XOR, AND is unaffected.
571 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
572 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
573 return false;
574
575 int64_t ShiftedVal = Val >> ShAmt;
576 if (!isInt<12>(ShiftedVal))
577 return false;
578
579 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
580 if (SignExt && ShAmt >= 32)
581 return false;
582
583 // Ok, we can reorder to get a smaller immediate.
584 unsigned BinOpc;
585 switch (Opcode) {
586 default: llvm_unreachable("Unexpected opcode");
587 case ISD::AND: BinOpc = RISCV::ANDI; break;
588 case ISD::OR: BinOpc = RISCV::ORI; break;
589 case ISD::XOR: BinOpc = RISCV::XORI; break;
590 }
591
592 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
593
594 SDNode *BinOp = CurDAG->getMachineNode(
595 BinOpc, DL, VT, Shift.getOperand(0),
596 CurDAG->getSignedTargetConstant(ShiftedVal, DL, VT));
597 SDNode *SLLI =
598 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
599 CurDAG->getTargetConstant(ShAmt, DL, VT));
600 ReplaceNode(Node, SLLI);
601 return true;
602}
603
605 // Only supported with XTHeadBb at the moment.
606 if (!Subtarget->hasVendorXTHeadBb())
607 return false;
608
609 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
610 if (!N1C)
611 return false;
612
613 SDValue N0 = Node->getOperand(0);
614 if (!N0.hasOneUse())
615 return false;
616
617 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
618 MVT VT) {
619 return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
620 CurDAG->getTargetConstant(Msb, DL, VT),
621 CurDAG->getTargetConstant(Lsb, DL, VT));
622 };
623
624 SDLoc DL(Node);
625 MVT VT = Node->getSimpleValueType(0);
626 const unsigned RightShAmt = N1C->getZExtValue();
627
628 // Transform (sra (shl X, C1) C2) with C1 < C2
629 // -> (TH.EXT X, msb, lsb)
630 if (N0.getOpcode() == ISD::SHL) {
631 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
632 if (!N01C)
633 return false;
634
635 const unsigned LeftShAmt = N01C->getZExtValue();
636 // Make sure that this is a bitfield extraction (i.e., the shift-right
637 // amount can not be less than the left-shift).
638 if (LeftShAmt > RightShAmt)
639 return false;
640
641 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
642 const unsigned Msb = MsbPlusOne - 1;
643 const unsigned Lsb = RightShAmt - LeftShAmt;
644
645 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
646 ReplaceNode(Node, TH_EXT);
647 return true;
648 }
649
650 // Transform (sra (sext_inreg X, _), C) ->
651 // (TH.EXT X, msb, lsb)
652 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
653 unsigned ExtSize =
654 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
655
656 // ExtSize of 32 should use sraiw via tablegen pattern.
657 if (ExtSize == 32)
658 return false;
659
660 const unsigned Msb = ExtSize - 1;
661 const unsigned Lsb = RightShAmt;
662
663 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
664 ReplaceNode(Node, TH_EXT);
665 return true;
666 }
667
668 return false;
669}
670
672 // Target does not support indexed loads.
673 if (!Subtarget->hasVendorXTHeadMemIdx())
674 return false;
675
676 LoadSDNode *Ld = cast<LoadSDNode>(Node);
678 if (AM == ISD::UNINDEXED)
679 return false;
680
681 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
682 if (!C)
683 return false;
684
685 EVT LoadVT = Ld->getMemoryVT();
686 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
687 "Unexpected addressing mode");
688 bool IsPre = AM == ISD::PRE_INC;
689 bool IsPost = AM == ISD::POST_INC;
690 int64_t Offset = C->getSExtValue();
691
692 // The constants that can be encoded in the THeadMemIdx instructions
693 // are of the form (sign_extend(imm5) << imm2).
694 unsigned Shift;
695 for (Shift = 0; Shift < 4; Shift++)
696 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
697 break;
698
699 // Constant cannot be encoded.
700 if (Shift == 4)
701 return false;
702
703 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
704 unsigned Opcode;
705 if (LoadVT == MVT::i8 && IsPre)
706 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
707 else if (LoadVT == MVT::i8 && IsPost)
708 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
709 else if (LoadVT == MVT::i16 && IsPre)
710 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
711 else if (LoadVT == MVT::i16 && IsPost)
712 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
713 else if (LoadVT == MVT::i32 && IsPre)
714 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
715 else if (LoadVT == MVT::i32 && IsPost)
716 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
717 else if (LoadVT == MVT::i64 && IsPre)
718 Opcode = RISCV::TH_LDIB;
719 else if (LoadVT == MVT::i64 && IsPost)
720 Opcode = RISCV::TH_LDIA;
721 else
722 return false;
723
724 EVT Ty = Ld->getOffset().getValueType();
725 SDValue Ops[] = {
726 Ld->getBasePtr(),
727 CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
728 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), Ld->getChain()};
729 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
730 Ld->getValueType(1), MVT::Other, Ops);
731
732 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
733 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
734
735 ReplaceNode(Node, New);
736
737 return true;
738}
739
741 if (!Subtarget->hasVInstructions())
742 return;
743
744 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
745
746 SDLoc DL(Node);
747 unsigned IntNo = Node->getConstantOperandVal(1);
748
749 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
750 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
751 "Unexpected vsetvli intrinsic");
752
753 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
754 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
755 SDValue SEWOp =
756 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
757 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
758 Node->getOperand(4), Node->getOperand(5),
759 Node->getOperand(8), SEWOp,
760 Node->getOperand(0)};
761
762 unsigned Opcode;
763 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
764 switch (LMulSDNode->getSExtValue()) {
765 case 5:
766 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF8
767 : RISCV::PseudoVC_I_SE_MF8;
768 break;
769 case 6:
770 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF4
771 : RISCV::PseudoVC_I_SE_MF4;
772 break;
773 case 7:
774 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF2
775 : RISCV::PseudoVC_I_SE_MF2;
776 break;
777 case 0:
778 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M1
779 : RISCV::PseudoVC_I_SE_M1;
780 break;
781 case 1:
782 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M2
783 : RISCV::PseudoVC_I_SE_M2;
784 break;
785 case 2:
786 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M4
787 : RISCV::PseudoVC_I_SE_M4;
788 break;
789 case 3:
790 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M8
791 : RISCV::PseudoVC_I_SE_M8;
792 break;
793 }
794
796 Opcode, DL, Node->getSimpleValueType(0), Operands));
797}
798
799static unsigned getSegInstNF(unsigned Intrinsic) {
800#define INST_NF_CASE(NAME, NF) \
801 case Intrinsic::riscv_##NAME##NF: \
802 return NF;
803#define INST_NF_CASE_MASK(NAME, NF) \
804 case Intrinsic::riscv_##NAME##NF##_mask: \
805 return NF;
806#define INST_NF_CASE_FF(NAME, NF) \
807 case Intrinsic::riscv_##NAME##NF##ff: \
808 return NF;
809#define INST_NF_CASE_FF_MASK(NAME, NF) \
810 case Intrinsic::riscv_##NAME##NF##ff_mask: \
811 return NF;
812#define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
813 MACRO_NAME(NAME, 2) \
814 MACRO_NAME(NAME, 3) \
815 MACRO_NAME(NAME, 4) \
816 MACRO_NAME(NAME, 5) \
817 MACRO_NAME(NAME, 6) \
818 MACRO_NAME(NAME, 7) \
819 MACRO_NAME(NAME, 8)
820#define INST_ALL_NF_CASE(NAME) \
821 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
822 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
823#define INST_ALL_NF_CASE_WITH_FF(NAME) \
824 INST_ALL_NF_CASE(NAME) \
825 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
826 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
827 switch (Intrinsic) {
828 default:
829 llvm_unreachable("Unexpected segment load/store intrinsic");
831 INST_ALL_NF_CASE(vlsseg)
832 INST_ALL_NF_CASE(vloxseg)
833 INST_ALL_NF_CASE(vluxseg)
834 INST_ALL_NF_CASE(vsseg)
835 INST_ALL_NF_CASE(vssseg)
836 INST_ALL_NF_CASE(vsoxseg)
837 INST_ALL_NF_CASE(vsuxseg)
838 }
839}
840
842 // If we have a custom node, we have already selected.
843 if (Node->isMachineOpcode()) {
844 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
845 Node->setNodeId(-1);
846 return;
847 }
848
849 // Instruction Selection not handled by the auto-generated tablegen selection
850 // should be handled here.
851 unsigned Opcode = Node->getOpcode();
852 MVT XLenVT = Subtarget->getXLenVT();
853 SDLoc DL(Node);
854 MVT VT = Node->getSimpleValueType(0);
855
856 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
857
858 switch (Opcode) {
859 case ISD::Constant: {
860 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
861 auto *ConstNode = cast<ConstantSDNode>(Node);
862 if (ConstNode->isZero()) {
863 SDValue New =
864 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
865 ReplaceNode(Node, New.getNode());
866 return;
867 }
868 int64_t Imm = ConstNode->getSExtValue();
869 // If only the lower 8 bits are used, try to convert this to a simm6 by
870 // sign-extending bit 7. This is neutral without the C extension, and
871 // allows C.LI to be used if C is present.
872 if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
873 Imm = SignExtend64<8>(Imm);
874 // If the upper XLen-16 bits are not used, try to convert this to a simm12
875 // by sign extending bit 15.
876 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
877 hasAllHUsers(Node))
878 Imm = SignExtend64<16>(Imm);
879 // If the upper 32-bits are not used try to convert this into a simm32 by
880 // sign extending bit 32.
881 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
882 Imm = SignExtend64<32>(Imm);
883
884 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
885 return;
886 }
887 case ISD::ConstantFP: {
888 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
889
890 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
891 SDValue Imm;
892 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
893 // create an integer immediate.
894 if (APF.isPosZero() || NegZeroF64)
895 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
896 else
897 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
898 *Subtarget);
899
900 bool HasZdinx = Subtarget->hasStdExtZdinx();
901 bool Is64Bit = Subtarget->is64Bit();
902 unsigned Opc;
903 switch (VT.SimpleTy) {
904 default:
905 llvm_unreachable("Unexpected size");
906 case MVT::bf16:
907 assert(Subtarget->hasStdExtZfbfmin());
908 Opc = RISCV::FMV_H_X;
909 break;
910 case MVT::f16:
911 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
912 break;
913 case MVT::f32:
914 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
915 break;
916 case MVT::f64:
917 // For RV32, we can't move from a GPR, we need to convert instead. This
918 // should only happen for +0.0 and -0.0.
919 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
920 if (Is64Bit)
921 Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
922 else
923 Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
924 break;
925 }
926
927 SDNode *Res;
928 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
929 Res =
930 CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
931 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
932 Res =
933 CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
934 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
935 Res = CurDAG->getMachineNode(
936 Opc, DL, VT, Imm,
938 else
939 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
940
941 // For f64 -0.0, we need to insert a fneg.d idiom.
942 if (NegZeroF64) {
943 Opc = RISCV::FSGNJN_D;
944 if (HasZdinx)
945 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
946 Res =
947 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
948 }
949
950 ReplaceNode(Node, Res);
951 return;
952 }
955 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
956 break;
957
958 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) &&
959 "BuildPairF64 only handled here on rv32i_zdinx");
960
961 SDValue Ops[] = {
962 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
963 Node->getOperand(0),
964 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
965 Node->getOperand(1),
966 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
967
968 SDNode *N = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
969 ReplaceNode(Node, N);
970 return;
971 }
973 case RISCVISD::SplitF64: {
974 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
975 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) &&
976 "SplitF64 only handled here on rv32i_zdinx");
977
978 if (!SDValue(Node, 0).use_empty()) {
979 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
980 Node->getValueType(0),
981 Node->getOperand(0));
982 ReplaceUses(SDValue(Node, 0), Lo);
983 }
984
985 if (!SDValue(Node, 1).use_empty()) {
987 RISCV::sub_gpr_odd, DL, Node->getValueType(1), Node->getOperand(0));
988 ReplaceUses(SDValue(Node, 1), Hi);
989 }
990
991 CurDAG->RemoveDeadNode(Node);
992 return;
993 }
994
995 assert(Opcode != RISCVISD::SplitGPRPair &&
996 "SplitGPRPair should already be handled");
997
998 if (!Subtarget->hasStdExtZfa())
999 break;
1000 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1001 "Unexpected subtarget");
1002
1003 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1004 if (!SDValue(Node, 0).use_empty()) {
1005 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1006 Node->getOperand(0));
1007 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1008 }
1009 if (!SDValue(Node, 1).use_empty()) {
1010 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1011 Node->getOperand(0));
1012 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1013 }
1014
1015 CurDAG->RemoveDeadNode(Node);
1016 return;
1017 }
1018 case ISD::SHL: {
1019 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1020 if (!N1C)
1021 break;
1022 SDValue N0 = Node->getOperand(0);
1023 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1024 !isa<ConstantSDNode>(N0.getOperand(1)))
1025 break;
1026 unsigned ShAmt = N1C->getZExtValue();
1027 uint64_t Mask = N0.getConstantOperandVal(1);
1028
1029 if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
1030 unsigned XLen = Subtarget->getXLen();
1031 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1032 unsigned TrailingZeros = llvm::countr_zero(Mask);
1033 if (TrailingZeros > 0 && LeadingZeros == 32) {
1034 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1035 // where C2 has 32 leading zeros and C3 trailing zeros.
1036 SDNode *SRLIW = CurDAG->getMachineNode(
1037 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1038 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1039 SDNode *SLLI = CurDAG->getMachineNode(
1040 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1041 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1042 ReplaceNode(Node, SLLI);
1043 return;
1044 }
1045 if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1046 XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1047 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1048 // where C2 has C4 leading zeros and no trailing zeros.
1049 // This is profitable if the "and" was to be lowered to
1050 // (srli (slli X, C4), C4) and not (andi X, C2).
1051 // For "LeadingZeros == 32":
1052 // - with Zba it's just (slli.uw X, C)
1053 // - without Zba a tablegen pattern applies the very same
1054 // transform as we would have done here
1055 SDNode *SLLI = CurDAG->getMachineNode(
1056 RISCV::SLLI, DL, VT, N0->getOperand(0),
1057 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1058 SDNode *SRLI = CurDAG->getMachineNode(
1059 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1060 CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
1061 ReplaceNode(Node, SRLI);
1062 return;
1063 }
1064 }
1065 break;
1066 }
1067 case ISD::SRL: {
1068 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1069 if (!N1C)
1070 break;
1071 SDValue N0 = Node->getOperand(0);
1072 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1073 break;
1074 unsigned ShAmt = N1C->getZExtValue();
1075 uint64_t Mask = N0.getConstantOperandVal(1);
1076
1077 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1078 // 32 leading zeros and C3 trailing zeros.
1079 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1080 unsigned XLen = Subtarget->getXLen();
1081 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1082 unsigned TrailingZeros = llvm::countr_zero(Mask);
1083 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1084 SDNode *SRLIW = CurDAG->getMachineNode(
1085 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1086 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1087 SDNode *SLLI = CurDAG->getMachineNode(
1088 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1089 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1090 ReplaceNode(Node, SLLI);
1091 return;
1092 }
1093 }
1094
1095 // Optimize (srl (and X, C2), C) ->
1096 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1097 // Where C2 is a mask with C3 trailing ones.
1098 // Taking into account that the C2 may have had lower bits unset by
1099 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1100 // This pattern occurs when type legalizing right shifts for types with
1101 // less than XLen bits.
1102 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1103 if (!isMask_64(Mask))
1104 break;
1105 unsigned TrailingOnes = llvm::countr_one(Mask);
1106 if (ShAmt >= TrailingOnes)
1107 break;
1108 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1109 if (TrailingOnes == 32) {
1110 SDNode *SRLI = CurDAG->getMachineNode(
1111 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1112 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1113 ReplaceNode(Node, SRLI);
1114 return;
1115 }
1116
1117 // Only do the remaining transforms if the AND has one use.
1118 if (!N0.hasOneUse())
1119 break;
1120
1121 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1122 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1123 SDNode *BEXTI = CurDAG->getMachineNode(
1124 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1125 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1126 ReplaceNode(Node, BEXTI);
1127 return;
1128 }
1129
1130 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1131 if (Subtarget->hasVendorXTHeadBb()) {
1132 SDNode *THEXTU = CurDAG->getMachineNode(
1133 RISCV::TH_EXTU, DL, VT, N0->getOperand(0),
1134 CurDAG->getTargetConstant(TrailingOnes - 1, DL, VT),
1135 CurDAG->getTargetConstant(ShAmt, DL, VT));
1136 ReplaceNode(Node, THEXTU);
1137 return;
1138 }
1139
1140 SDNode *SLLI =
1141 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1142 CurDAG->getTargetConstant(LShAmt, DL, VT));
1143 SDNode *SRLI = CurDAG->getMachineNode(
1144 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1145 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1146 ReplaceNode(Node, SRLI);
1147 return;
1148 }
1149 case ISD::SRA: {
1150 if (trySignedBitfieldExtract(Node))
1151 return;
1152
1153 // Optimize (sra (sext_inreg X, i16), C) ->
1154 // (srai (slli X, (XLen-16), (XLen-16) + C)
1155 // And (sra (sext_inreg X, i8), C) ->
1156 // (srai (slli X, (XLen-8), (XLen-8) + C)
1157 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1158 // This transform matches the code we get without Zbb. The shifts are more
1159 // compressible, and this can help expose CSE opportunities in the sdiv by
1160 // constant optimization.
1161 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1162 if (!N1C)
1163 break;
1164 SDValue N0 = Node->getOperand(0);
1165 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1166 break;
1167 unsigned ShAmt = N1C->getZExtValue();
1168 unsigned ExtSize =
1169 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1170 // ExtSize of 32 should use sraiw via tablegen pattern.
1171 if (ExtSize >= 32 || ShAmt >= ExtSize)
1172 break;
1173 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1174 SDNode *SLLI =
1175 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1176 CurDAG->getTargetConstant(LShAmt, DL, VT));
1177 SDNode *SRAI = CurDAG->getMachineNode(
1178 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1179 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1180 ReplaceNode(Node, SRAI);
1181 return;
1182 }
1183 case ISD::OR:
1184 case ISD::XOR:
1185 if (tryShrinkShlLogicImm(Node))
1186 return;
1187
1188 break;
1189 case ISD::AND: {
1190 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1191 if (!N1C)
1192 break;
1193
1194 SDValue N0 = Node->getOperand(0);
1195
1196 auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
1197 SDValue X, unsigned Msb,
1198 unsigned Lsb) {
1199 if (!Subtarget->hasVendorXTHeadBb())
1200 return false;
1201
1202 SDNode *TH_EXTU = CurDAG->getMachineNode(
1203 RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
1204 CurDAG->getTargetConstant(Lsb, DL, VT));
1205 ReplaceNode(Node, TH_EXTU);
1206 return true;
1207 };
1208
1209 bool LeftShift = N0.getOpcode() == ISD::SHL;
1210 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1211 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1212 if (!C)
1213 break;
1214 unsigned C2 = C->getZExtValue();
1215 unsigned XLen = Subtarget->getXLen();
1216 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1217
1218 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1219 // shift pair might offer more compression opportunities.
1220 // TODO: We could check for C extension here, but we don't have many lit
1221 // tests with the C extension enabled so not checking gets better
1222 // coverage.
1223 // TODO: What if ANDI faster than shift?
1224 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1225
1226 uint64_t C1 = N1C->getZExtValue();
1227
1228 // Clear irrelevant bits in the mask.
1229 if (LeftShift)
1230 C1 &= maskTrailingZeros<uint64_t>(C2);
1231 else
1232 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1233
1234 // Some transforms should only be done if the shift has a single use or
1235 // the AND would become (srli (slli X, 32), 32)
1236 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1237
1238 SDValue X = N0.getOperand(0);
1239
1240 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1241 // with c3 leading zeros.
1242 if (!LeftShift && isMask_64(C1)) {
1243 unsigned Leading = XLen - llvm::bit_width(C1);
1244 if (C2 < Leading) {
1245 // If the number of leading zeros is C2+32 this can be SRLIW.
1246 if (C2 + 32 == Leading) {
1247 SDNode *SRLIW = CurDAG->getMachineNode(
1248 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1249 ReplaceNode(Node, SRLIW);
1250 return;
1251 }
1252
1253 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1254 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1255 //
1256 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1257 // legalized and goes through DAG combine.
1258 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1259 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1260 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1261 SDNode *SRAIW =
1262 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1263 CurDAG->getTargetConstant(31, DL, VT));
1264 SDNode *SRLIW = CurDAG->getMachineNode(
1265 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1266 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1267 ReplaceNode(Node, SRLIW);
1268 return;
1269 }
1270
1271 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1272 // available.
1273 // Transform (and (srl x, C2), C1)
1274 // -> (<bfextract> x, msb, lsb)
1275 //
1276 // Make sure to keep this below the SRLIW cases, as we always want to
1277 // prefer the more common instruction.
1278 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1279 const unsigned Lsb = C2;
1280 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1281 return;
1282
1283 // (srli (slli x, c3-c2), c3).
1284 // Skip if we could use (zext.w (sraiw X, C2)).
1285 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1286 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1287 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1288 // Also Skip if we can use bexti or th.tst.
1289 Skip |= HasBitTest && Leading == XLen - 1;
1290 if (OneUseOrZExtW && !Skip) {
1291 SDNode *SLLI = CurDAG->getMachineNode(
1292 RISCV::SLLI, DL, VT, X,
1293 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1294 SDNode *SRLI = CurDAG->getMachineNode(
1295 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1296 CurDAG->getTargetConstant(Leading, DL, VT));
1297 ReplaceNode(Node, SRLI);
1298 return;
1299 }
1300 }
1301 }
1302
1303 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1304 // shifted by c2 bits with c3 leading zeros.
1305 if (LeftShift && isShiftedMask_64(C1)) {
1306 unsigned Leading = XLen - llvm::bit_width(C1);
1307
1308 if (C2 + Leading < XLen &&
1309 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1310 // Use slli.uw when possible.
1311 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1312 SDNode *SLLI_UW =
1313 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1314 CurDAG->getTargetConstant(C2, DL, VT));
1315 ReplaceNode(Node, SLLI_UW);
1316 return;
1317 }
1318
1319 // (srli (slli c2+c3), c3)
1320 if (OneUseOrZExtW && !IsCANDI) {
1321 SDNode *SLLI = CurDAG->getMachineNode(
1322 RISCV::SLLI, DL, VT, X,
1323 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1324 SDNode *SRLI = CurDAG->getMachineNode(
1325 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1326 CurDAG->getTargetConstant(Leading, DL, VT));
1327 ReplaceNode(Node, SRLI);
1328 return;
1329 }
1330 }
1331 }
1332
1333 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1334 // shifted mask with c2 leading zeros and c3 trailing zeros.
1335 if (!LeftShift && isShiftedMask_64(C1)) {
1336 unsigned Leading = XLen - llvm::bit_width(C1);
1337 unsigned Trailing = llvm::countr_zero(C1);
1338 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1339 !IsCANDI) {
1340 unsigned SrliOpc = RISCV::SRLI;
1341 // If the input is zexti32 we should use SRLIW.
1342 if (X.getOpcode() == ISD::AND &&
1343 isa<ConstantSDNode>(X.getOperand(1)) &&
1344 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1345 SrliOpc = RISCV::SRLIW;
1346 X = X.getOperand(0);
1347 }
1348 SDNode *SRLI = CurDAG->getMachineNode(
1349 SrliOpc, DL, VT, X,
1350 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1351 SDNode *SLLI = CurDAG->getMachineNode(
1352 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1353 CurDAG->getTargetConstant(Trailing, DL, VT));
1354 ReplaceNode(Node, SLLI);
1355 return;
1356 }
1357 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1358 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1359 OneUseOrZExtW && !IsCANDI) {
1360 SDNode *SRLIW = CurDAG->getMachineNode(
1361 RISCV::SRLIW, DL, VT, X,
1362 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1363 SDNode *SLLI = CurDAG->getMachineNode(
1364 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1365 CurDAG->getTargetConstant(Trailing, DL, VT));
1366 ReplaceNode(Node, SLLI);
1367 return;
1368 }
1369 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1370 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1371 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1372 SDNode *SRLI = CurDAG->getMachineNode(
1373 RISCV::SRLI, DL, VT, X,
1374 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1375 SDNode *SLLI_UW = CurDAG->getMachineNode(
1376 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1377 CurDAG->getTargetConstant(Trailing, DL, VT));
1378 ReplaceNode(Node, SLLI_UW);
1379 return;
1380 }
1381 }
1382
1383 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1384 // shifted mask with no leading zeros and c3 trailing zeros.
1385 if (LeftShift && isShiftedMask_64(C1)) {
1386 unsigned Leading = XLen - llvm::bit_width(C1);
1387 unsigned Trailing = llvm::countr_zero(C1);
1388 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1389 SDNode *SRLI = CurDAG->getMachineNode(
1390 RISCV::SRLI, DL, VT, X,
1391 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1392 SDNode *SLLI = CurDAG->getMachineNode(
1393 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1394 CurDAG->getTargetConstant(Trailing, DL, VT));
1395 ReplaceNode(Node, SLLI);
1396 return;
1397 }
1398 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1399 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1400 SDNode *SRLIW = CurDAG->getMachineNode(
1401 RISCV::SRLIW, DL, VT, X,
1402 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1403 SDNode *SLLI = CurDAG->getMachineNode(
1404 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1405 CurDAG->getTargetConstant(Trailing, DL, VT));
1406 ReplaceNode(Node, SLLI);
1407 return;
1408 }
1409
1410 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1411 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1412 Subtarget->hasStdExtZba()) {
1413 SDNode *SRLI = CurDAG->getMachineNode(
1414 RISCV::SRLI, DL, VT, X,
1415 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1416 SDNode *SLLI_UW = CurDAG->getMachineNode(
1417 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1418 CurDAG->getTargetConstant(Trailing, DL, VT));
1419 ReplaceNode(Node, SLLI_UW);
1420 return;
1421 }
1422 }
1423 }
1424
1425 const uint64_t C1 = N1C->getZExtValue();
1426
1427 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1428 N0.hasOneUse()) {
1429 unsigned C2 = N0.getConstantOperandVal(1);
1430 unsigned XLen = Subtarget->getXLen();
1431 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1432
1433 SDValue X = N0.getOperand(0);
1434
1435 // Prefer SRAIW + ANDI when possible.
1436 bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1437 X.getOpcode() == ISD::SHL &&
1438 isa<ConstantSDNode>(X.getOperand(1)) &&
1439 X.getConstantOperandVal(1) == 32;
1440 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1441 // mask with c3 leading zeros and c2 is larger than c3.
1442 if (isMask_64(C1) && !Skip) {
1443 unsigned Leading = XLen - llvm::bit_width(C1);
1444 if (C2 > Leading) {
1445 SDNode *SRAI = CurDAG->getMachineNode(
1446 RISCV::SRAI, DL, VT, X,
1447 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1448 SDNode *SRLI = CurDAG->getMachineNode(
1449 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1450 CurDAG->getTargetConstant(Leading, DL, VT));
1451 ReplaceNode(Node, SRLI);
1452 return;
1453 }
1454 }
1455
1456 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1457 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1458 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1459 if (isShiftedMask_64(C1) && !Skip) {
1460 unsigned Leading = XLen - llvm::bit_width(C1);
1461 unsigned Trailing = llvm::countr_zero(C1);
1462 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1463 SDNode *SRAI = CurDAG->getMachineNode(
1464 RISCV::SRAI, DL, VT, N0.getOperand(0),
1465 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1466 SDNode *SRLI = CurDAG->getMachineNode(
1467 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1468 CurDAG->getTargetConstant(Leading + Trailing, DL, VT));
1469 SDNode *SLLI = CurDAG->getMachineNode(
1470 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1471 CurDAG->getTargetConstant(Trailing, DL, VT));
1472 ReplaceNode(Node, SLLI);
1473 return;
1474 }
1475 }
1476 }
1477
1478 // If C1 masks off the upper bits only (but can't be formed as an
1479 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1480 // available.
1481 // Transform (and x, C1)
1482 // -> (<bfextract> x, msb, lsb)
1483 if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue())) {
1484 const unsigned Msb = llvm::bit_width(C1) - 1;
1485 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1486 return;
1487 }
1488
1489 if (tryShrinkShlLogicImm(Node))
1490 return;
1491
1492 break;
1493 }
1494 case ISD::MUL: {
1495 // Special case for calculating (mul (and X, C2), C1) where the full product
1496 // fits in XLen bits. We can shift X left by the number of leading zeros in
1497 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1498 // product has XLen trailing zeros, putting it in the output of MULHU. This
1499 // can avoid materializing a constant in a register for C2.
1500
1501 // RHS should be a constant.
1502 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1503 if (!N1C || !N1C->hasOneUse())
1504 break;
1505
1506 // LHS should be an AND with constant.
1507 SDValue N0 = Node->getOperand(0);
1508 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1509 break;
1510
1512
1513 // Constant should be a mask.
1514 if (!isMask_64(C2))
1515 break;
1516
1517 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1518 // multiple users or the constant is a simm12. This prevents inserting a
1519 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1520 // make it more costly to materialize. Otherwise, using a SLLI might allow
1521 // it to be compressed.
1522 bool IsANDIOrZExt =
1523 isInt<12>(C2) ||
1524 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1525 // With XTHeadBb, we can use TH.EXTU.
1526 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1527 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1528 break;
1529 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1530 // the constant is a simm32.
1531 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1532 // With XTHeadBb, we can use TH.EXTU.
1533 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1534 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1535 break;
1536
1537 // We need to shift left the AND input and C1 by a total of XLen bits.
1538
1539 // How far left do we need to shift the AND input?
1540 unsigned XLen = Subtarget->getXLen();
1541 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1542
1543 // The constant gets shifted by the remaining amount unless that would
1544 // shift bits out.
1545 uint64_t C1 = N1C->getZExtValue();
1546 unsigned ConstantShift = XLen - LeadingZeros;
1547 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1548 break;
1549
1550 uint64_t ShiftedC1 = C1 << ConstantShift;
1551 // If this RV32, we need to sign extend the constant.
1552 if (XLen == 32)
1553 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1554
1555 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1556 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1557 SDNode *SLLI =
1558 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1559 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1560 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1561 SDValue(SLLI, 0), SDValue(Imm, 0));
1562 ReplaceNode(Node, MULHU);
1563 return;
1564 }
1565 case ISD::LOAD: {
1566 if (tryIndexedLoad(Node))
1567 return;
1568
1569 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1570 // We match post-incrementing load here
1571 LoadSDNode *Load = cast<LoadSDNode>(Node);
1572 if (Load->getAddressingMode() != ISD::POST_INC)
1573 break;
1574
1575 SDValue Chain = Node->getOperand(0);
1576 SDValue Base = Node->getOperand(1);
1577 SDValue Offset = Node->getOperand(2);
1578
1579 bool Simm12 = false;
1580 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1581
1582 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1583 int ConstantVal = ConstantOffset->getSExtValue();
1584 Simm12 = isInt<12>(ConstantVal);
1585 if (Simm12)
1586 Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1587 Offset.getValueType());
1588 }
1589
1590 unsigned Opcode = 0;
1591 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1592 case MVT::i8:
1593 if (Simm12 && SignExtend)
1594 Opcode = RISCV::CV_LB_ri_inc;
1595 else if (Simm12 && !SignExtend)
1596 Opcode = RISCV::CV_LBU_ri_inc;
1597 else if (!Simm12 && SignExtend)
1598 Opcode = RISCV::CV_LB_rr_inc;
1599 else
1600 Opcode = RISCV::CV_LBU_rr_inc;
1601 break;
1602 case MVT::i16:
1603 if (Simm12 && SignExtend)
1604 Opcode = RISCV::CV_LH_ri_inc;
1605 else if (Simm12 && !SignExtend)
1606 Opcode = RISCV::CV_LHU_ri_inc;
1607 else if (!Simm12 && SignExtend)
1608 Opcode = RISCV::CV_LH_rr_inc;
1609 else
1610 Opcode = RISCV::CV_LHU_rr_inc;
1611 break;
1612 case MVT::i32:
1613 if (Simm12)
1614 Opcode = RISCV::CV_LW_ri_inc;
1615 else
1616 Opcode = RISCV::CV_LW_rr_inc;
1617 break;
1618 default:
1619 break;
1620 }
1621 if (!Opcode)
1622 break;
1623
1624 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1625 Chain.getSimpleValueType(), Base,
1626 Offset, Chain));
1627 return;
1628 }
1629 break;
1630 }
1632 unsigned IntNo = Node->getConstantOperandVal(0);
1633 switch (IntNo) {
1634 // By default we do not custom select any intrinsic.
1635 default:
1636 break;
1637 case Intrinsic::riscv_vmsgeu:
1638 case Intrinsic::riscv_vmsge: {
1639 SDValue Src1 = Node->getOperand(1);
1640 SDValue Src2 = Node->getOperand(2);
1641 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1642 bool IsCmpConstant = false;
1643 bool IsCmpMinimum = false;
1644 // Only custom select scalar second operand.
1645 if (Src2.getValueType() != XLenVT)
1646 break;
1647 // Small constants are handled with patterns.
1648 int64_t CVal = 0;
1649 MVT Src1VT = Src1.getSimpleValueType();
1650 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1651 IsCmpConstant = true;
1652 CVal = C->getSExtValue();
1653 if (CVal >= -15 && CVal <= 16) {
1654 if (!IsUnsigned || CVal != 0)
1655 break;
1656 IsCmpMinimum = true;
1657 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1658 Src1VT.getScalarSizeInBits())
1659 .getSExtValue()) {
1660 IsCmpMinimum = true;
1661 }
1662 }
1663 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
1664 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1665 default:
1666 llvm_unreachable("Unexpected LMUL!");
1667#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
1668 case RISCVII::VLMUL::lmulenum: \
1669 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1670 : RISCV::PseudoVMSLT_VX_##suffix; \
1671 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
1672 : RISCV::PseudoVMSGT_VX_##suffix; \
1673 break;
1674 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1675 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1676 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1677 CASE_VMSLT_OPCODES(LMUL_1, M1)
1678 CASE_VMSLT_OPCODES(LMUL_2, M2)
1679 CASE_VMSLT_OPCODES(LMUL_4, M4)
1680 CASE_VMSLT_OPCODES(LMUL_8, M8)
1681#undef CASE_VMSLT_OPCODES
1682 }
1683 // Mask operations use the LMUL from the mask type.
1684 switch (RISCVTargetLowering::getLMUL(VT)) {
1685 default:
1686 llvm_unreachable("Unexpected LMUL!");
1687#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
1688 case RISCVII::VLMUL::lmulenum: \
1689 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1690 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
1691 break;
1692 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
1693 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
1694 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
1695 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
1696 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
1697 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
1698 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
1699#undef CASE_VMNAND_VMSET_OPCODES
1700 }
1702 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1703 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1704 SDValue VL;
1705 selectVLOp(Node->getOperand(3), VL);
1706
1707 // If vmsge(u) with minimum value, expand it to vmset.
1708 if (IsCmpMinimum) {
1709 ReplaceNode(Node,
1710 CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, MaskSEW));
1711 return;
1712 }
1713
1714 if (IsCmpConstant) {
1715 SDValue Imm =
1716 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
1717
1718 ReplaceNode(Node, CurDAG->getMachineNode(VMSGTOpcode, DL, VT,
1719 {Src1, Imm, VL, SEW}));
1720 return;
1721 }
1722
1723 // Expand to
1724 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1725 SDValue Cmp = SDValue(
1726 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1727 0);
1728 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1729 {Cmp, Cmp, VL, MaskSEW}));
1730 return;
1731 }
1732 case Intrinsic::riscv_vmsgeu_mask:
1733 case Intrinsic::riscv_vmsge_mask: {
1734 SDValue Src1 = Node->getOperand(2);
1735 SDValue Src2 = Node->getOperand(3);
1736 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1737 bool IsCmpConstant = false;
1738 bool IsCmpMinimum = false;
1739 // Only custom select scalar second operand.
1740 if (Src2.getValueType() != XLenVT)
1741 break;
1742 // Small constants are handled with patterns.
1743 MVT Src1VT = Src1.getSimpleValueType();
1744 int64_t CVal = 0;
1745 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1746 IsCmpConstant = true;
1747 CVal = C->getSExtValue();
1748 if (CVal >= -15 && CVal <= 16) {
1749 if (!IsUnsigned || CVal != 0)
1750 break;
1751 IsCmpMinimum = true;
1752 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1753 Src1VT.getScalarSizeInBits())
1754 .getSExtValue()) {
1755 IsCmpMinimum = true;
1756 }
1757 }
1758 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1759 VMOROpcode, VMSGTMaskOpcode;
1760 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1761 default:
1762 llvm_unreachable("Unexpected LMUL!");
1763#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
1764 case RISCVII::VLMUL::lmulenum: \
1765 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1766 : RISCV::PseudoVMSLT_VX_##suffix; \
1767 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
1768 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
1769 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
1770 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
1771 break;
1772 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1773 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1774 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1775 CASE_VMSLT_OPCODES(LMUL_1, M1)
1776 CASE_VMSLT_OPCODES(LMUL_2, M2)
1777 CASE_VMSLT_OPCODES(LMUL_4, M4)
1778 CASE_VMSLT_OPCODES(LMUL_8, M8)
1779#undef CASE_VMSLT_OPCODES
1780 }
1781 // Mask operations use the LMUL from the mask type.
1782 switch (RISCVTargetLowering::getLMUL(VT)) {
1783 default:
1784 llvm_unreachable("Unexpected LMUL!");
1785#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
1786 case RISCVII::VLMUL::lmulenum: \
1787 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
1788 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
1789 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
1790 break;
1791 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
1792 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
1793 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
1798#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1799 }
1801 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1802 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1803 SDValue VL;
1804 selectVLOp(Node->getOperand(5), VL);
1805 SDValue MaskedOff = Node->getOperand(1);
1806 SDValue Mask = Node->getOperand(4);
1807
1808 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
1809 if (IsCmpMinimum) {
1810 // We don't need vmor if the MaskedOff and the Mask are the same
1811 // value.
1812 if (Mask == MaskedOff) {
1813 ReplaceUses(Node, Mask.getNode());
1814 return;
1815 }
1816 ReplaceNode(Node,
1817 CurDAG->getMachineNode(VMOROpcode, DL, VT,
1818 {Mask, MaskedOff, VL, MaskSEW}));
1819 return;
1820 }
1821
1822 // If the MaskedOff value and the Mask are the same value use
1823 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
1824 // This avoids needing to copy v0 to vd before starting the next sequence.
1825 if (Mask == MaskedOff) {
1826 SDValue Cmp = SDValue(
1827 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1828 0);
1829 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1830 {Mask, Cmp, VL, MaskSEW}));
1831 return;
1832 }
1833
1834 // Mask needs to be copied to V0.
1836 RISCV::V0, Mask, SDValue());
1837 SDValue Glue = Chain.getValue(1);
1838 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1839
1840 if (IsCmpConstant) {
1841 SDValue Imm =
1842 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
1843
1845 VMSGTMaskOpcode, DL, VT,
1846 {MaskedOff, Src1, Imm, V0, VL, SEW, Glue}));
1847 return;
1848 }
1849
1850 // Otherwise use
1851 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1852 // The result is mask undisturbed.
1853 // We use the same instructions to emulate mask agnostic behavior, because
1854 // the agnostic result can be either undisturbed or all 1.
1855 SDValue Cmp = SDValue(
1856 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1857 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1858 0);
1859 // vmxor.mm vd, vd, v0 is used to update active value.
1860 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1861 {Cmp, Mask, VL, MaskSEW}));
1862 return;
1863 }
1864 case Intrinsic::riscv_vsetvli:
1865 case Intrinsic::riscv_vsetvlimax:
1866 return selectVSETVLI(Node);
1867 }
1868 break;
1869 }
1871 unsigned IntNo = Node->getConstantOperandVal(1);
1872 switch (IntNo) {
1873 // By default we do not custom select any intrinsic.
1874 default:
1875 break;
1876 case Intrinsic::riscv_vlseg2:
1877 case Intrinsic::riscv_vlseg3:
1878 case Intrinsic::riscv_vlseg4:
1879 case Intrinsic::riscv_vlseg5:
1880 case Intrinsic::riscv_vlseg6:
1881 case Intrinsic::riscv_vlseg7:
1882 case Intrinsic::riscv_vlseg8: {
1883 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
1884 /*IsStrided*/ false);
1885 return;
1886 }
1887 case Intrinsic::riscv_vlseg2_mask:
1888 case Intrinsic::riscv_vlseg3_mask:
1889 case Intrinsic::riscv_vlseg4_mask:
1890 case Intrinsic::riscv_vlseg5_mask:
1891 case Intrinsic::riscv_vlseg6_mask:
1892 case Intrinsic::riscv_vlseg7_mask:
1893 case Intrinsic::riscv_vlseg8_mask: {
1894 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
1895 /*IsStrided*/ false);
1896 return;
1897 }
1898 case Intrinsic::riscv_vlsseg2:
1899 case Intrinsic::riscv_vlsseg3:
1900 case Intrinsic::riscv_vlsseg4:
1901 case Intrinsic::riscv_vlsseg5:
1902 case Intrinsic::riscv_vlsseg6:
1903 case Intrinsic::riscv_vlsseg7:
1904 case Intrinsic::riscv_vlsseg8: {
1905 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
1906 /*IsStrided*/ true);
1907 return;
1908 }
1909 case Intrinsic::riscv_vlsseg2_mask:
1910 case Intrinsic::riscv_vlsseg3_mask:
1911 case Intrinsic::riscv_vlsseg4_mask:
1912 case Intrinsic::riscv_vlsseg5_mask:
1913 case Intrinsic::riscv_vlsseg6_mask:
1914 case Intrinsic::riscv_vlsseg7_mask:
1915 case Intrinsic::riscv_vlsseg8_mask: {
1916 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
1917 /*IsStrided*/ true);
1918 return;
1919 }
1920 case Intrinsic::riscv_vloxseg2:
1921 case Intrinsic::riscv_vloxseg3:
1922 case Intrinsic::riscv_vloxseg4:
1923 case Intrinsic::riscv_vloxseg5:
1924 case Intrinsic::riscv_vloxseg6:
1925 case Intrinsic::riscv_vloxseg7:
1926 case Intrinsic::riscv_vloxseg8:
1927 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
1928 /*IsOrdered*/ true);
1929 return;
1930 case Intrinsic::riscv_vluxseg2:
1931 case Intrinsic::riscv_vluxseg3:
1932 case Intrinsic::riscv_vluxseg4:
1933 case Intrinsic::riscv_vluxseg5:
1934 case Intrinsic::riscv_vluxseg6:
1935 case Intrinsic::riscv_vluxseg7:
1936 case Intrinsic::riscv_vluxseg8:
1937 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
1938 /*IsOrdered*/ false);
1939 return;
1940 case Intrinsic::riscv_vloxseg2_mask:
1941 case Intrinsic::riscv_vloxseg3_mask:
1942 case Intrinsic::riscv_vloxseg4_mask:
1943 case Intrinsic::riscv_vloxseg5_mask:
1944 case Intrinsic::riscv_vloxseg6_mask:
1945 case Intrinsic::riscv_vloxseg7_mask:
1946 case Intrinsic::riscv_vloxseg8_mask:
1947 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
1948 /*IsOrdered*/ true);
1949 return;
1950 case Intrinsic::riscv_vluxseg2_mask:
1951 case Intrinsic::riscv_vluxseg3_mask:
1952 case Intrinsic::riscv_vluxseg4_mask:
1953 case Intrinsic::riscv_vluxseg5_mask:
1954 case Intrinsic::riscv_vluxseg6_mask:
1955 case Intrinsic::riscv_vluxseg7_mask:
1956 case Intrinsic::riscv_vluxseg8_mask:
1957 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
1958 /*IsOrdered*/ false);
1959 return;
1960 case Intrinsic::riscv_vlseg8ff:
1961 case Intrinsic::riscv_vlseg7ff:
1962 case Intrinsic::riscv_vlseg6ff:
1963 case Intrinsic::riscv_vlseg5ff:
1964 case Intrinsic::riscv_vlseg4ff:
1965 case Intrinsic::riscv_vlseg3ff:
1966 case Intrinsic::riscv_vlseg2ff: {
1967 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ false);
1968 return;
1969 }
1970 case Intrinsic::riscv_vlseg8ff_mask:
1971 case Intrinsic::riscv_vlseg7ff_mask:
1972 case Intrinsic::riscv_vlseg6ff_mask:
1973 case Intrinsic::riscv_vlseg5ff_mask:
1974 case Intrinsic::riscv_vlseg4ff_mask:
1975 case Intrinsic::riscv_vlseg3ff_mask:
1976 case Intrinsic::riscv_vlseg2ff_mask: {
1977 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ true);
1978 return;
1979 }
1980 case Intrinsic::riscv_vloxei:
1981 case Intrinsic::riscv_vloxei_mask:
1982 case Intrinsic::riscv_vluxei:
1983 case Intrinsic::riscv_vluxei_mask: {
1984 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1985 IntNo == Intrinsic::riscv_vluxei_mask;
1986 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1987 IntNo == Intrinsic::riscv_vloxei_mask;
1988
1989 MVT VT = Node->getSimpleValueType(0);
1990 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1991
1992 unsigned CurOp = 2;
1994 Operands.push_back(Node->getOperand(CurOp++));
1995
1996 MVT IndexVT;
1997 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1998 /*IsStridedOrIndexed*/ true, Operands,
1999 /*IsLoad=*/true, &IndexVT);
2000
2002 "Element count mismatch");
2003
2005 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2006 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2007 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2008 report_fatal_error("The V extension does not support EEW=64 for index "
2009 "values when XLEN=32");
2010 }
2011 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2012 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
2013 static_cast<unsigned>(IndexLMUL));
2014 MachineSDNode *Load =
2015 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2016
2017 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2018 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2019
2020 ReplaceNode(Node, Load);
2021 return;
2022 }
2023 case Intrinsic::riscv_vlm:
2024 case Intrinsic::riscv_vle:
2025 case Intrinsic::riscv_vle_mask:
2026 case Intrinsic::riscv_vlse:
2027 case Intrinsic::riscv_vlse_mask: {
2028 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2029 IntNo == Intrinsic::riscv_vlse_mask;
2030 bool IsStrided =
2031 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2032
2033 MVT VT = Node->getSimpleValueType(0);
2034 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2035
2036 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2037 // operand at the IR level. In pseudos, they have both policy and
2038 // passthru operand. The passthru operand is needed to track the
2039 // "tail undefined" state, and the policy is there just for
2040 // for consistency - it will always be "don't care" for the
2041 // unmasked form.
2042 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2043 unsigned CurOp = 2;
2045 if (HasPassthruOperand)
2046 Operands.push_back(Node->getOperand(CurOp++));
2047 else {
2048 // We eagerly lower to implicit_def (instead of undef), as we
2049 // otherwise fail to select nodes such as: nxv1i1 = undef
2050 SDNode *Passthru =
2051 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
2052 Operands.push_back(SDValue(Passthru, 0));
2053 }
2054 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2055 Operands, /*IsLoad=*/true);
2056
2058 const RISCV::VLEPseudo *P =
2059 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2060 static_cast<unsigned>(LMUL));
2061 MachineSDNode *Load =
2062 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2063
2064 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2065 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2066
2067 ReplaceNode(Node, Load);
2068 return;
2069 }
2070 case Intrinsic::riscv_vleff:
2071 case Intrinsic::riscv_vleff_mask: {
2072 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2073
2074 MVT VT = Node->getSimpleValueType(0);
2075 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2076
2077 unsigned CurOp = 2;
2079 Operands.push_back(Node->getOperand(CurOp++));
2080 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2081 /*IsStridedOrIndexed*/ false, Operands,
2082 /*IsLoad=*/true);
2083
2085 const RISCV::VLEPseudo *P =
2086 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2087 Log2SEW, static_cast<unsigned>(LMUL));
2089 P->Pseudo, DL, Node->getVTList(), Operands);
2090 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2091 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2092
2093 ReplaceNode(Node, Load);
2094 return;
2095 }
2096 }
2097 break;
2098 }
2099 case ISD::INTRINSIC_VOID: {
2100 unsigned IntNo = Node->getConstantOperandVal(1);
2101 switch (IntNo) {
2102 case Intrinsic::riscv_vsseg2:
2103 case Intrinsic::riscv_vsseg3:
2104 case Intrinsic::riscv_vsseg4:
2105 case Intrinsic::riscv_vsseg5:
2106 case Intrinsic::riscv_vsseg6:
2107 case Intrinsic::riscv_vsseg7:
2108 case Intrinsic::riscv_vsseg8: {
2109 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2110 /*IsStrided*/ false);
2111 return;
2112 }
2113 case Intrinsic::riscv_vsseg2_mask:
2114 case Intrinsic::riscv_vsseg3_mask:
2115 case Intrinsic::riscv_vsseg4_mask:
2116 case Intrinsic::riscv_vsseg5_mask:
2117 case Intrinsic::riscv_vsseg6_mask:
2118 case Intrinsic::riscv_vsseg7_mask:
2119 case Intrinsic::riscv_vsseg8_mask: {
2120 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2121 /*IsStrided*/ false);
2122 return;
2123 }
2124 case Intrinsic::riscv_vssseg2:
2125 case Intrinsic::riscv_vssseg3:
2126 case Intrinsic::riscv_vssseg4:
2127 case Intrinsic::riscv_vssseg5:
2128 case Intrinsic::riscv_vssseg6:
2129 case Intrinsic::riscv_vssseg7:
2130 case Intrinsic::riscv_vssseg8: {
2131 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2132 /*IsStrided*/ true);
2133 return;
2134 }
2135 case Intrinsic::riscv_vssseg2_mask:
2136 case Intrinsic::riscv_vssseg3_mask:
2137 case Intrinsic::riscv_vssseg4_mask:
2138 case Intrinsic::riscv_vssseg5_mask:
2139 case Intrinsic::riscv_vssseg6_mask:
2140 case Intrinsic::riscv_vssseg7_mask:
2141 case Intrinsic::riscv_vssseg8_mask: {
2142 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2143 /*IsStrided*/ true);
2144 return;
2145 }
2146 case Intrinsic::riscv_vsoxseg2:
2147 case Intrinsic::riscv_vsoxseg3:
2148 case Intrinsic::riscv_vsoxseg4:
2149 case Intrinsic::riscv_vsoxseg5:
2150 case Intrinsic::riscv_vsoxseg6:
2151 case Intrinsic::riscv_vsoxseg7:
2152 case Intrinsic::riscv_vsoxseg8:
2153 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2154 /*IsOrdered*/ true);
2155 return;
2156 case Intrinsic::riscv_vsuxseg2:
2157 case Intrinsic::riscv_vsuxseg3:
2158 case Intrinsic::riscv_vsuxseg4:
2159 case Intrinsic::riscv_vsuxseg5:
2160 case Intrinsic::riscv_vsuxseg6:
2161 case Intrinsic::riscv_vsuxseg7:
2162 case Intrinsic::riscv_vsuxseg8:
2163 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2164 /*IsOrdered*/ false);
2165 return;
2166 case Intrinsic::riscv_vsoxseg2_mask:
2167 case Intrinsic::riscv_vsoxseg3_mask:
2168 case Intrinsic::riscv_vsoxseg4_mask:
2169 case Intrinsic::riscv_vsoxseg5_mask:
2170 case Intrinsic::riscv_vsoxseg6_mask:
2171 case Intrinsic::riscv_vsoxseg7_mask:
2172 case Intrinsic::riscv_vsoxseg8_mask:
2173 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2174 /*IsOrdered*/ true);
2175 return;
2176 case Intrinsic::riscv_vsuxseg2_mask:
2177 case Intrinsic::riscv_vsuxseg3_mask:
2178 case Intrinsic::riscv_vsuxseg4_mask:
2179 case Intrinsic::riscv_vsuxseg5_mask:
2180 case Intrinsic::riscv_vsuxseg6_mask:
2181 case Intrinsic::riscv_vsuxseg7_mask:
2182 case Intrinsic::riscv_vsuxseg8_mask:
2183 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2184 /*IsOrdered*/ false);
2185 return;
2186 case Intrinsic::riscv_vsoxei:
2187 case Intrinsic::riscv_vsoxei_mask:
2188 case Intrinsic::riscv_vsuxei:
2189 case Intrinsic::riscv_vsuxei_mask: {
2190 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2191 IntNo == Intrinsic::riscv_vsuxei_mask;
2192 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2193 IntNo == Intrinsic::riscv_vsoxei_mask;
2194
2195 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2196 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2197
2198 unsigned CurOp = 2;
2200 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2201
2202 MVT IndexVT;
2203 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2204 /*IsStridedOrIndexed*/ true, Operands,
2205 /*IsLoad=*/false, &IndexVT);
2206
2208 "Element count mismatch");
2209
2211 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2212 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2213 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2214 report_fatal_error("The V extension does not support EEW=64 for index "
2215 "values when XLEN=32");
2216 }
2217 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2218 IsMasked, IsOrdered, IndexLog2EEW,
2219 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2220 MachineSDNode *Store =
2221 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2222
2223 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2224 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2225
2226 ReplaceNode(Node, Store);
2227 return;
2228 }
2229 case Intrinsic::riscv_vsm:
2230 case Intrinsic::riscv_vse:
2231 case Intrinsic::riscv_vse_mask:
2232 case Intrinsic::riscv_vsse:
2233 case Intrinsic::riscv_vsse_mask: {
2234 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2235 IntNo == Intrinsic::riscv_vsse_mask;
2236 bool IsStrided =
2237 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2238
2239 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2240 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2241
2242 unsigned CurOp = 2;
2244 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2245
2246 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2247 Operands);
2248
2250 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2251 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2252 MachineSDNode *Store =
2253 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2254 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2255 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2256
2257 ReplaceNode(Node, Store);
2258 return;
2259 }
2260 case Intrinsic::riscv_sf_vc_x_se:
2261 case Intrinsic::riscv_sf_vc_i_se:
2262 selectSF_VC_X_SE(Node);
2263 return;
2264 }
2265 break;
2266 }
2267 case ISD::BITCAST: {
2268 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2269 // Just drop bitcasts between vectors if both are fixed or both are
2270 // scalable.
2271 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2272 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2273 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2274 CurDAG->RemoveDeadNode(Node);
2275 return;
2276 }
2277 break;
2278 }
2281 SDValue V = Node->getOperand(0);
2282 SDValue SubV = Node->getOperand(1);
2283 SDLoc DL(SubV);
2284 auto Idx = Node->getConstantOperandVal(2);
2285 MVT SubVecVT = SubV.getSimpleValueType();
2286
2287 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2288 MVT SubVecContainerVT = SubVecVT;
2289 // Establish the correct scalable-vector types for any fixed-length type.
2290 if (SubVecVT.isFixedLengthVector()) {
2291 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2293 [[maybe_unused]] bool ExactlyVecRegSized =
2294 Subtarget->expandVScale(SubVecVT.getSizeInBits())
2295 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2296 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2297 .getKnownMinValue()));
2298 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2299 }
2300 MVT ContainerVT = VT;
2301 if (VT.isFixedLengthVector())
2302 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2303
2304 const auto *TRI = Subtarget->getRegisterInfo();
2305 unsigned SubRegIdx;
2306 std::tie(SubRegIdx, Idx) =
2308 ContainerVT, SubVecContainerVT, Idx, TRI);
2309
2310 // If the Idx hasn't been completely eliminated then this is a subvector
2311 // insert which doesn't naturally align to a vector register. These must
2312 // be handled using instructions to manipulate the vector registers.
2313 if (Idx != 0)
2314 break;
2315
2316 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
2317 [[maybe_unused]] bool IsSubVecPartReg =
2318 SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
2319 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
2320 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
2321 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
2322 V.isUndef()) &&
2323 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2324 "the subvector is smaller than a full-sized register");
2325
2326 // If we haven't set a SubRegIdx, then we must be going between
2327 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2328 if (SubRegIdx == RISCV::NoSubRegister) {
2329 unsigned InRegClassID =
2332 InRegClassID &&
2333 "Unexpected subvector extraction");
2334 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2335 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2336 DL, VT, SubV, RC);
2337 ReplaceNode(Node, NewNode);
2338 return;
2339 }
2340
2341 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2342 ReplaceNode(Node, Insert.getNode());
2343 return;
2344 }
2347 SDValue V = Node->getOperand(0);
2348 auto Idx = Node->getConstantOperandVal(1);
2349 MVT InVT = V.getSimpleValueType();
2350 SDLoc DL(V);
2351
2352 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2353 MVT SubVecContainerVT = VT;
2354 // Establish the correct scalable-vector types for any fixed-length type.
2355 if (VT.isFixedLengthVector()) {
2356 assert(Idx == 0);
2357 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2358 }
2359 if (InVT.isFixedLengthVector())
2360 InVT = TLI.getContainerForFixedLengthVector(InVT);
2361
2362 const auto *TRI = Subtarget->getRegisterInfo();
2363 unsigned SubRegIdx;
2364 std::tie(SubRegIdx, Idx) =
2366 InVT, SubVecContainerVT, Idx, TRI);
2367
2368 // If the Idx hasn't been completely eliminated then this is a subvector
2369 // extract which doesn't naturally align to a vector register. These must
2370 // be handled using instructions to manipulate the vector registers.
2371 if (Idx != 0)
2372 break;
2373
2374 // If we haven't set a SubRegIdx, then we must be going between
2375 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2376 if (SubRegIdx == RISCV::NoSubRegister) {
2377 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2379 InRegClassID &&
2380 "Unexpected subvector extraction");
2381 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2382 SDNode *NewNode =
2383 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2384 ReplaceNode(Node, NewNode);
2385 return;
2386 }
2387
2388 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2389 ReplaceNode(Node, Extract.getNode());
2390 return;
2391 }
2395 case RISCVISD::VFMV_V_F_VL: {
2396 // Try to match splat of a scalar load to a strided load with stride of x0.
2397 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2398 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2399 if (!Node->getOperand(0).isUndef())
2400 break;
2401 SDValue Src = Node->getOperand(1);
2402 auto *Ld = dyn_cast<LoadSDNode>(Src);
2403 // Can't fold load update node because the second
2404 // output is used so that load update node can't be removed.
2405 if (!Ld || Ld->isIndexed())
2406 break;
2407 EVT MemVT = Ld->getMemoryVT();
2408 // The memory VT should be the same size as the element type.
2409 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2410 break;
2411 if (!IsProfitableToFold(Src, Node, Node) ||
2412 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2413 break;
2414
2415 SDValue VL;
2416 if (IsScalarMove) {
2417 // We could deal with more VL if we update the VSETVLI insert pass to
2418 // avoid introducing more VSETVLI.
2419 if (!isOneConstant(Node->getOperand(2)))
2420 break;
2421 selectVLOp(Node->getOperand(2), VL);
2422 } else
2423 selectVLOp(Node->getOperand(2), VL);
2424
2425 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2426 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2427
2428 // If VL=1, then we don't need to do a strided load and can just do a
2429 // regular load.
2430 bool IsStrided = !isOneConstant(VL);
2431
2432 // Only do a strided load if we have optimized zero-stride vector load.
2433 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2434 break;
2435
2437 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2438 Ld->getBasePtr()};
2439 if (IsStrided)
2440 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2442 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2443 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2444
2446 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2447 /*IsMasked*/ false, IsStrided, /*FF*/ false,
2448 Log2SEW, static_cast<unsigned>(LMUL));
2449 MachineSDNode *Load =
2450 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2451 // Update the chain.
2452 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2453 // Record the mem-refs
2454 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2455 // Replace the splat with the vlse.
2456 ReplaceNode(Node, Load);
2457 return;
2458 }
2459 case ISD::PREFETCH:
2460 unsigned Locality = Node->getConstantOperandVal(3);
2461 if (Locality > 2)
2462 break;
2463
2464 if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) {
2465 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2467
2468 int NontemporalLevel = 0;
2469 switch (Locality) {
2470 case 0:
2471 NontemporalLevel = 3; // NTL.ALL
2472 break;
2473 case 1:
2474 NontemporalLevel = 1; // NTL.PALL
2475 break;
2476 case 2:
2477 NontemporalLevel = 0; // NTL.P1
2478 break;
2479 default:
2480 llvm_unreachable("unexpected locality value.");
2481 }
2482
2483 if (NontemporalLevel & 0b1)
2485 if (NontemporalLevel & 0b10)
2487 }
2488 break;
2489 }
2490
2491 // Select the default instruction.
2492 SelectCode(Node);
2493}
2494
2496 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2497 std::vector<SDValue> &OutOps) {
2498 // Always produce a register and immediate operand, as expected by
2499 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2500 switch (ConstraintID) {
2503 SDValue Op0, Op1;
2504 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2505 assert(Found && "SelectAddrRegImm should always succeed");
2506 OutOps.push_back(Op0);
2507 OutOps.push_back(Op1);
2508 return false;
2509 }
2511 OutOps.push_back(Op);
2512 OutOps.push_back(
2513 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2514 return false;
2515 default:
2516 report_fatal_error("Unexpected asm memory constraint " +
2517 InlineAsm::getMemConstraintName(ConstraintID));
2518 }
2519
2520 return true;
2521}
2522
2524 SDValue &Offset) {
2525 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2526 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2527 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2528 return true;
2529 }
2530
2531 return false;
2532}
2533
2534// Fold constant addresses.
2535static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2536 const MVT VT, const RISCVSubtarget *Subtarget,
2538 bool IsPrefetch = false,
2539 bool IsRV32Zdinx = false) {
2540 if (!isa<ConstantSDNode>(Addr))
2541 return false;
2542
2543 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2544
2545 // If the constant is a simm12, we can fold the whole constant and use X0 as
2546 // the base. If the constant can be materialized with LUI+simm12, use LUI as
2547 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2548 int64_t Lo12 = SignExtend64<12>(CVal);
2549 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2550 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2551 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2552 return false;
2553 if (IsRV32Zdinx && !isInt<12>(Lo12 + 4))
2554 return false;
2555
2556 if (Hi) {
2557 int64_t Hi20 = (Hi >> 12) & 0xfffff;
2558 Base = SDValue(
2559 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2560 CurDAG->getTargetConstant(Hi20, DL, VT)),
2561 0);
2562 } else {
2563 Base = CurDAG->getRegister(RISCV::X0, VT);
2564 }
2565 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
2566 return true;
2567 }
2568
2569 // Ask how constant materialization would handle this constant.
2570 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2571
2572 // If the last instruction would be an ADDI, we can fold its immediate and
2573 // emit the rest of the sequence as the base.
2574 if (Seq.back().getOpcode() != RISCV::ADDI)
2575 return false;
2576 Lo12 = Seq.back().getImm();
2577 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2578 return false;
2579 if (IsRV32Zdinx && !isInt<12>(Lo12 + 4))
2580 return false;
2581
2582 // Drop the last instruction.
2583 Seq.pop_back();
2584 assert(!Seq.empty() && "Expected more instructions in sequence");
2585
2586 Base = selectImmSeq(CurDAG, DL, VT, Seq);
2587 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
2588 return true;
2589}
2590
2591// Is this ADD instruction only used as the base pointer of scalar loads and
2592// stores?
2594 for (auto *User : Add->users()) {
2595 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
2596 User->getOpcode() != ISD::ATOMIC_LOAD &&
2597 User->getOpcode() != ISD::ATOMIC_STORE)
2598 return false;
2599 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
2600 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2601 VT != MVT::f64)
2602 return false;
2603 // Don't allow stores of the value. It must be used as the address.
2604 if (User->getOpcode() == ISD::STORE &&
2605 cast<StoreSDNode>(User)->getValue() == Add)
2606 return false;
2607 if (User->getOpcode() == ISD::ATOMIC_STORE &&
2608 cast<AtomicSDNode>(User)->getVal() == Add)
2609 return false;
2610 }
2611
2612 return true;
2613}
2614
2616 unsigned MaxShiftAmount,
2617 SDValue &Base, SDValue &Index,
2618 SDValue &Scale) {
2619 EVT VT = Addr.getSimpleValueType();
2620 auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2621 SDValue &Shift) {
2622 uint64_t ShiftAmt = 0;
2623 Index = N;
2624
2625 if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
2626 // Only match shifts by a value in range [0, MaxShiftAmount].
2627 if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
2628 Index = N.getOperand(0);
2629 ShiftAmt = N.getConstantOperandVal(1);
2630 }
2631 }
2632
2633 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
2634 return ShiftAmt != 0;
2635 };
2636
2637 if (Addr.getOpcode() == ISD::ADD) {
2638 if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2639 SDValue AddrB = Addr.getOperand(0);
2640 if (AddrB.getOpcode() == ISD::ADD &&
2641 UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
2642 !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
2643 isInt<12>(C1->getSExtValue())) {
2644 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2645 SDValue C1Val =
2646 CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
2647 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
2648 AddrB.getOperand(1), C1Val),
2649 0);
2650 return true;
2651 }
2652 } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
2653 Base = Addr.getOperand(1);
2654 return true;
2655 } else {
2656 UnwrapShl(Addr.getOperand(1), Index, Scale);
2657 Base = Addr.getOperand(0);
2658 return true;
2659 }
2660 } else if (UnwrapShl(Addr, Index, Scale)) {
2661 EVT VT = Addr.getValueType();
2662 Base = CurDAG->getRegister(RISCV::X0, VT);
2663 return true;
2664 }
2665
2666 return false;
2667}
2668
2670 SDValue &Offset, bool IsRV32Zdinx) {
2672 return true;
2673
2674 SDLoc DL(Addr);
2675 MVT VT = Addr.getSimpleValueType();
2676
2677 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2678 // If this is non RV32Zdinx we can always fold.
2679 if (!IsRV32Zdinx) {
2680 Base = Addr.getOperand(0);
2681 Offset = Addr.getOperand(1);
2682 return true;
2683 }
2684
2685 // For RV32Zdinx we need to have more than 4 byte alignment so we can add 4
2686 // to the offset when we expand in RISCVExpandPseudoInsts.
2687 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
2688 const DataLayout &DL = CurDAG->getDataLayout();
2689 Align Alignment = commonAlignment(
2690 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2691 if (Alignment > 4) {
2692 Base = Addr.getOperand(0);
2693 Offset = Addr.getOperand(1);
2694 return true;
2695 }
2696 }
2697 if (auto *CP = dyn_cast<ConstantPoolSDNode>(Addr.getOperand(1))) {
2698 Align Alignment = commonAlignment(CP->getAlign(), CP->getOffset());
2699 if (Alignment > 4) {
2700 Base = Addr.getOperand(0);
2701 Offset = Addr.getOperand(1);
2702 return true;
2703 }
2704 }
2705 }
2706
2707 int64_t RV32ZdinxRange = IsRV32Zdinx ? 4 : 0;
2709 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2710 if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
2711 Base = Addr.getOperand(0);
2712 if (Base.getOpcode() == RISCVISD::ADD_LO) {
2713 SDValue LoOperand = Base.getOperand(1);
2714 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2715 // If the Lo in (ADD_LO hi, lo) is a global variable's address
2716 // (its low part, really), then we can rely on the alignment of that
2717 // variable to provide a margin of safety before low part can overflow
2718 // the 12 bits of the load/store offset. Check if CVal falls within
2719 // that margin; if so (low part + CVal) can't overflow.
2720 const DataLayout &DL = CurDAG->getDataLayout();
2721 Align Alignment = commonAlignment(
2722 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2723 if ((CVal == 0 || Alignment > CVal) &&
2724 (!IsRV32Zdinx || commonAlignment(Alignment, CVal) > 4)) {
2725 int64_t CombinedOffset = CVal + GA->getOffset();
2726 Base = Base.getOperand(0);
2728 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2729 CombinedOffset, GA->getTargetFlags());
2730 return true;
2731 }
2732 }
2733 }
2734
2735 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2736 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2738 return true;
2739 }
2740 }
2741
2742 // Handle ADD with large immediates.
2743 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2744 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2745 assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
2746 "simm12 not already handled?");
2747
2748 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2749 // an ADDI for part of the offset and fold the rest into the load/store.
2750 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2751 if (CVal >= -4096 && CVal <= (4094 - RV32ZdinxRange)) {
2752 int64_t Adj = CVal < 0 ? -2048 : 2047;
2753 Base = SDValue(
2754 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2755 CurDAG->getSignedTargetConstant(Adj, DL, VT)),
2756 0);
2757 Offset = CurDAG->getSignedTargetConstant(CVal - Adj, DL, VT);
2758 return true;
2759 }
2760
2761 // For larger immediates, we might be able to save one instruction from
2762 // constant materialization by folding the Lo12 bits of the immediate into
2763 // the address. We should only do this if the ADD is only used by loads and
2764 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2765 // separately with the full materialized immediate creating extra
2766 // instructions.
2767 if (isWorthFoldingAdd(Addr) &&
2768 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2769 Offset, /*IsPrefetch=*/false, RV32ZdinxRange)) {
2770 // Insert an ADD instruction with the materialized Hi52 bits.
2771 Base = SDValue(
2772 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2773 0);
2774 return true;
2775 }
2776 }
2777
2778 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
2779 /*IsPrefetch=*/false, RV32ZdinxRange))
2780 return true;
2781
2782 Base = Addr;
2783 Offset = CurDAG->getTargetConstant(0, DL, VT);
2784 return true;
2785}
2786
2787/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2788/// Offset should be all zeros.
2790 SDValue &Offset) {
2792 return true;
2793
2794 SDLoc DL(Addr);
2795 MVT VT = Addr.getSimpleValueType();
2796
2798 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2799 if (isInt<12>(CVal)) {
2800 Base = Addr.getOperand(0);
2801
2802 // Early-out if not a valid offset.
2803 if ((CVal & 0b11111) != 0) {
2804 Base = Addr;
2805 Offset = CurDAG->getTargetConstant(0, DL, VT);
2806 return true;
2807 }
2808
2809 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2810 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2812 return true;
2813 }
2814 }
2815
2816 // Handle ADD with large immediates.
2817 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2818 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2819 assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
2820 "simm12 not already handled?");
2821
2822 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
2823 // one instruction by folding adjustment (-2048 or 2016) into the address.
2824 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
2825 int64_t Adj = CVal < 0 ? -2048 : 2016;
2826 int64_t AdjustedOffset = CVal - Adj;
2827 Base =
2829 RISCV::ADDI, DL, VT, Addr.getOperand(0),
2830 CurDAG->getSignedTargetConstant(AdjustedOffset, DL, VT)),
2831 0);
2833 return true;
2834 }
2835
2836 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2837 Offset, /*IsPrefetch=*/true)) {
2838 // Insert an ADD instruction with the materialized Hi52 bits.
2839 Base = SDValue(
2840 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2841 0);
2842 return true;
2843 }
2844 }
2845
2846 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
2847 /*IsPrefetch=*/true))
2848 return true;
2849
2850 Base = Addr;
2851 Offset = CurDAG->getTargetConstant(0, DL, VT);
2852 return true;
2853}
2854
2856 SDValue &Offset) {
2857 if (Addr.getOpcode() != ISD::ADD)
2858 return false;
2859
2860 if (isa<ConstantSDNode>(Addr.getOperand(1)))
2861 return false;
2862
2863 Base = Addr.getOperand(1);
2864 Offset = Addr.getOperand(0);
2865 return true;
2866}
2867
2869 SDValue &ShAmt) {
2870 ShAmt = N;
2871
2872 // Peek through zext.
2873 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
2874 ShAmt = ShAmt.getOperand(0);
2875
2876 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
2877 // amount. If there is an AND on the shift amount, we can bypass it if it
2878 // doesn't affect any of those bits.
2879 if (ShAmt.getOpcode() == ISD::AND &&
2880 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2881 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
2882
2883 // Since the max shift amount is a power of 2 we can subtract 1 to make a
2884 // mask that covers the bits needed to represent all shift amounts.
2885 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
2886 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
2887
2888 if (ShMask.isSubsetOf(AndMask)) {
2889 ShAmt = ShAmt.getOperand(0);
2890 } else {
2891 // SimplifyDemandedBits may have optimized the mask so try restoring any
2892 // bits that are known zero.
2893 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
2894 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
2895 return true;
2896 ShAmt = ShAmt.getOperand(0);
2897 }
2898 }
2899
2900 if (ShAmt.getOpcode() == ISD::ADD &&
2901 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2902 uint64_t Imm = ShAmt.getConstantOperandVal(1);
2903 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
2904 // to avoid the ADD.
2905 if (Imm != 0 && Imm % ShiftWidth == 0) {
2906 ShAmt = ShAmt.getOperand(0);
2907 return true;
2908 }
2909 } else if (ShAmt.getOpcode() == ISD::SUB &&
2910 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
2911 uint64_t Imm = ShAmt.getConstantOperandVal(0);
2912 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2913 // generate a NEG instead of a SUB of a constant.
2914 if (Imm != 0 && Imm % ShiftWidth == 0) {
2915 SDLoc DL(ShAmt);
2916 EVT VT = ShAmt.getValueType();
2917 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
2918 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
2919 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
2920 ShAmt.getOperand(1));
2921 ShAmt = SDValue(Neg, 0);
2922 return true;
2923 }
2924 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2925 // to generate a NOT instead of a SUB of a constant.
2926 if (Imm % ShiftWidth == ShiftWidth - 1) {
2927 SDLoc DL(ShAmt);
2928 EVT VT = ShAmt.getValueType();
2930 RISCV::XORI, DL, VT, ShAmt.getOperand(1),
2931 CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true));
2932 ShAmt = SDValue(Not, 0);
2933 return true;
2934 }
2935 }
2936
2937 return true;
2938}
2939
2940/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2941/// check for equality with 0. This function emits instructions that convert the
2942/// seteq/setne into something that can be compared with 0.
2943/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
2944/// ISD::SETNE).
2946 SDValue &Val) {
2947 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
2948 "Unexpected condition code!");
2949
2950 // We're looking for a setcc.
2951 if (N->getOpcode() != ISD::SETCC)
2952 return false;
2953
2954 // Must be an equality comparison.
2955 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
2956 if (CCVal != ExpectedCCVal)
2957 return false;
2958
2959 SDValue LHS = N->getOperand(0);
2960 SDValue RHS = N->getOperand(1);
2961
2962 if (!LHS.getValueType().isScalarInteger())
2963 return false;
2964
2965 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
2966 if (isNullConstant(RHS)) {
2967 Val = LHS;
2968 return true;
2969 }
2970
2971 SDLoc DL(N);
2972
2973 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
2974 int64_t CVal = C->getSExtValue();
2975 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2976 // non-zero otherwise.
2977 if (CVal == -2048) {
2978 Val = SDValue(
2980 RISCV::XORI, DL, N->getValueType(0), LHS,
2981 CurDAG->getSignedTargetConstant(CVal, DL, N->getValueType(0))),
2982 0);
2983 return true;
2984 }
2985 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
2986 // LHS is equal to the RHS and non-zero otherwise.
2987 if (isInt<12>(CVal) || CVal == 2048) {
2988 Val = SDValue(
2990 RISCV::ADDI, DL, N->getValueType(0), LHS,
2991 CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
2992 0);
2993 return true;
2994 }
2995 if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) {
2996 Val = SDValue(
2998 RISCV::BINVI, DL, N->getValueType(0), LHS,
2999 CurDAG->getTargetConstant(Log2_64(CVal), DL, N->getValueType(0))),
3000 0);
3001 return true;
3002 }
3003 }
3004
3005 // If nothing else we can XOR the LHS and RHS to produce zero if they are
3006 // equal and a non-zero value if they aren't.
3007 Val = SDValue(
3008 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
3009 return true;
3010}
3011
3013 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3014 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
3015 Val = N.getOperand(0);
3016 return true;
3017 }
3018
3019 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
3020 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
3021 return N;
3022
3023 SDValue N0 = N.getOperand(0);
3024 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3025 N.getConstantOperandVal(1) == ShiftAmt &&
3026 N0.getConstantOperandVal(1) == ShiftAmt)
3027 return N0.getOperand(0);
3028
3029 return N;
3030 };
3031
3032 MVT VT = N.getSimpleValueType();
3033 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
3034 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
3035 return true;
3036 }
3037
3038 return false;
3039}
3040
3042 if (N.getOpcode() == ISD::AND) {
3043 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3044 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3045 Val = N.getOperand(0);
3046 return true;
3047 }
3048 }
3049 MVT VT = N.getSimpleValueType();
3050 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
3051 if (CurDAG->MaskedValueIsZero(N, Mask)) {
3052 Val = N;
3053 return true;
3054 }
3055
3056 return false;
3057}
3058
3059/// Look for various patterns that can be done with a SHL that can be folded
3060/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
3061/// SHXADD we are trying to match.
3063 SDValue &Val) {
3064 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
3065 SDValue N0 = N.getOperand(0);
3066
3067 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
3068 (LeftShift || N0.getOpcode() == ISD::SRL) &&
3069 isa<ConstantSDNode>(N0.getOperand(1))) {
3070 uint64_t Mask = N.getConstantOperandVal(1);
3071 unsigned C2 = N0.getConstantOperandVal(1);
3072
3073 unsigned XLen = Subtarget->getXLen();
3074 if (LeftShift)
3075 Mask &= maskTrailingZeros<uint64_t>(C2);
3076 else
3077 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
3078
3079 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
3080 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
3081 // followed by a SHXADD with c3 for the X amount.
3082 if (isShiftedMask_64(Mask)) {
3083 unsigned Leading = XLen - llvm::bit_width(Mask);
3084 unsigned Trailing = llvm::countr_zero(Mask);
3085 if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
3086 SDLoc DL(N);
3087 EVT VT = N.getValueType();
3089 RISCV::SRLI, DL, VT, N0.getOperand(0),
3090 CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
3091 0);
3092 return true;
3093 }
3094 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3095 // leading zeros and c3 trailing zeros. We can use an SRLI by C3
3096 // followed by a SHXADD using c3 for the X amount.
3097 if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
3098 SDLoc DL(N);
3099 EVT VT = N.getValueType();
3100 Val = SDValue(
3102 RISCV::SRLI, DL, VT, N0.getOperand(0),
3103 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
3104 0);
3105 return true;
3106 }
3107 }
3108 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
3109 isa<ConstantSDNode>(N0.getOperand(1))) {
3110 uint64_t Mask = N.getConstantOperandVal(1);
3111 unsigned C2 = N0.getConstantOperandVal(1);
3112
3113 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
3114 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
3115 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
3116 // the X amount.
3117 if (isShiftedMask_64(Mask)) {
3118 unsigned XLen = Subtarget->getXLen();
3119 unsigned Leading = XLen - llvm::bit_width(Mask);
3120 unsigned Trailing = llvm::countr_zero(Mask);
3121 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
3122 SDLoc DL(N);
3123 EVT VT = N.getValueType();
3125 RISCV::SRAI, DL, VT, N0.getOperand(0),
3126 CurDAG->getTargetConstant(C2 - Leading, DL, VT)),
3127 0);
3129 RISCV::SRLI, DL, VT, Val,
3130 CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)),
3131 0);
3132 return true;
3133 }
3134 }
3135 }
3136 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
3137 (LeftShift || N.getOpcode() == ISD::SRL) &&
3138 isa<ConstantSDNode>(N.getOperand(1))) {
3139 SDValue N0 = N.getOperand(0);
3140 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
3141 isa<ConstantSDNode>(N0.getOperand(1))) {
3142 uint64_t Mask = N0.getConstantOperandVal(1);
3143 if (isShiftedMask_64(Mask)) {
3144 unsigned C1 = N.getConstantOperandVal(1);
3145 unsigned XLen = Subtarget->getXLen();
3146 unsigned Leading = XLen - llvm::bit_width(Mask);
3147 unsigned Trailing = llvm::countr_zero(Mask);
3148 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
3149 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
3150 if (LeftShift && Leading == 32 && Trailing > 0 &&
3151 (Trailing + C1) == ShAmt) {
3152 SDLoc DL(N);
3153 EVT VT = N.getValueType();
3155 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3156 CurDAG->getTargetConstant(Trailing, DL, VT)),
3157 0);
3158 return true;
3159 }
3160 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3161 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3162 if (!LeftShift && Leading == 32 && Trailing > C1 &&
3163 (Trailing - C1) == ShAmt) {
3164 SDLoc DL(N);
3165 EVT VT = N.getValueType();
3167 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3168 CurDAG->getTargetConstant(Trailing, DL, VT)),
3169 0);
3170 return true;
3171 }
3172 }
3173 }
3174 }
3175
3176 return false;
3177}
3178
3179/// Look for various patterns that can be done with a SHL that can be folded
3180/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3181/// SHXADD_UW we are trying to match.
3183 SDValue &Val) {
3184 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
3185 N.hasOneUse()) {
3186 SDValue N0 = N.getOperand(0);
3187 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3188 N0.hasOneUse()) {
3189 uint64_t Mask = N.getConstantOperandVal(1);
3190 unsigned C2 = N0.getConstantOperandVal(1);
3191
3192 Mask &= maskTrailingZeros<uint64_t>(C2);
3193
3194 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3195 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3196 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3197 if (isShiftedMask_64(Mask)) {
3198 unsigned Leading = llvm::countl_zero(Mask);
3199 unsigned Trailing = llvm::countr_zero(Mask);
3200 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3201 SDLoc DL(N);
3202 EVT VT = N.getValueType();
3204 RISCV::SLLI, DL, VT, N0.getOperand(0),
3205 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
3206 0);
3207 return true;
3208 }
3209 }
3210 }
3211 }
3212
3213 return false;
3214}
3215
3217 if (!isa<ConstantSDNode>(N))
3218 return false;
3219
3220 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3221 if ((Imm & 0xfff) != 0xfff || Imm == -1)
3222 return false;
3223
3224 for (const SDNode *U : N->users()) {
3225 if (!ISD::isBitwiseLogicOp(U->getOpcode()))
3226 return false;
3227 }
3228
3229 // For 32-bit signed constants we already know it's a win: LUI+ADDI vs LUI.
3230 // For 64-bit constants, the instruction sequences get complex,
3231 // so we select inverted only if it's cheaper.
3232 if (!isInt<32>(Imm)) {
3233 int OrigImmCost = RISCVMatInt::getIntMatCost(APInt(64, Imm), 64, *Subtarget,
3234 /*CompressionCost=*/true);
3235 int NegImmCost = RISCVMatInt::getIntMatCost(APInt(64, ~Imm), 64, *Subtarget,
3236 /*CompressionCost=*/true);
3237 if (OrigImmCost <= NegImmCost)
3238 return false;
3239 }
3240
3241 Val = selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), ~Imm, *Subtarget);
3242 return true;
3243}
3244
3245static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
3246 unsigned Bits,
3247 const TargetInstrInfo *TII) {
3248 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
3249
3250 if (!MCOpcode)
3251 return false;
3252
3253 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
3254 const uint64_t TSFlags = MCID.TSFlags;
3255 if (!RISCVII::hasSEWOp(TSFlags))
3256 return false;
3257 assert(RISCVII::hasVLOp(TSFlags));
3258
3259 bool HasGlueOp = User->getGluedNode() != nullptr;
3260 unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
3261 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
3262 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
3263 unsigned VLIdx =
3264 User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3265 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
3266
3267 if (UserOpNo == VLIdx)
3268 return false;
3269
3270 auto NumDemandedBits =
3271 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
3272 return NumDemandedBits && Bits >= *NumDemandedBits;
3273}
3274
3275// Return true if all users of this SDNode* only consume the lower \p Bits.
3276// This can be used to form W instructions for add/sub/mul/shl even when the
3277// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3278// SimplifyDemandedBits has made it so some users see a sext_inreg and some
3279// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3280// the add/sub/mul/shl to become non-W instructions. By checking the users we
3281// may be able to use a W instruction and CSE with the other instruction if
3282// this has happened. We could try to detect that the CSE opportunity exists
3283// before doing this, but that would be more complicated.
3285 const unsigned Depth) const {
3286 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3287 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3288 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3289 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3290 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3291 isa<ConstantSDNode>(Node) || Depth != 0) &&
3292 "Unexpected opcode");
3293
3295 return false;
3296
3297 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3298 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3299 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
3300 return false;
3301
3302 for (SDUse &Use : Node->uses()) {
3303 SDNode *User = Use.getUser();
3304 // Users of this node should have already been instruction selected
3305 if (!User->isMachineOpcode())
3306 return false;
3307
3308 // TODO: Add more opcodes?
3309 switch (User->getMachineOpcode()) {
3310 default:
3312 break;
3313 return false;
3314 case RISCV::ADDW:
3315 case RISCV::ADDIW:
3316 case RISCV::SUBW:
3317 case RISCV::MULW:
3318 case RISCV::SLLW:
3319 case RISCV::SLLIW:
3320 case RISCV::SRAW:
3321 case RISCV::SRAIW:
3322 case RISCV::SRLW:
3323 case RISCV::SRLIW:
3324 case RISCV::DIVW:
3325 case RISCV::DIVUW:
3326 case RISCV::REMW:
3327 case RISCV::REMUW:
3328 case RISCV::ROLW:
3329 case RISCV::RORW:
3330 case RISCV::RORIW:
3331 case RISCV::CLZW:
3332 case RISCV::CTZW:
3333 case RISCV::CPOPW:
3334 case RISCV::SLLI_UW:
3335 case RISCV::FMV_W_X:
3336 case RISCV::FCVT_H_W:
3337 case RISCV::FCVT_H_W_INX:
3338 case RISCV::FCVT_H_WU:
3339 case RISCV::FCVT_H_WU_INX:
3340 case RISCV::FCVT_S_W:
3341 case RISCV::FCVT_S_W_INX:
3342 case RISCV::FCVT_S_WU:
3343 case RISCV::FCVT_S_WU_INX:
3344 case RISCV::FCVT_D_W:
3345 case RISCV::FCVT_D_W_INX:
3346 case RISCV::FCVT_D_WU:
3347 case RISCV::FCVT_D_WU_INX:
3348 case RISCV::TH_REVW:
3349 case RISCV::TH_SRRIW:
3350 if (Bits >= 32)
3351 break;
3352 return false;
3353 case RISCV::SLL:
3354 case RISCV::SRA:
3355 case RISCV::SRL:
3356 case RISCV::ROL:
3357 case RISCV::ROR:
3358 case RISCV::BSET:
3359 case RISCV::BCLR:
3360 case RISCV::BINV:
3361 // Shift amount operands only use log2(Xlen) bits.
3362 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
3363 break;
3364 return false;
3365 case RISCV::SLLI:
3366 // SLLI only uses the lower (XLen - ShAmt) bits.
3367 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
3368 break;
3369 return false;
3370 case RISCV::ANDI:
3371 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
3372 break;
3373 goto RecCheck;
3374 case RISCV::ORI: {
3375 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3376 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
3377 break;
3378 [[fallthrough]];
3379 }
3380 case RISCV::AND:
3381 case RISCV::OR:
3382 case RISCV::XOR:
3383 case RISCV::XORI:
3384 case RISCV::ANDN:
3385 case RISCV::ORN:
3386 case RISCV::XNOR:
3387 case RISCV::SH1ADD:
3388 case RISCV::SH2ADD:
3389 case RISCV::SH3ADD:
3390 RecCheck:
3391 if (hasAllNBitUsers(User, Bits, Depth + 1))
3392 break;
3393 return false;
3394 case RISCV::SRLI: {
3395 unsigned ShAmt = User->getConstantOperandVal(1);
3396 // If we are shifting right by less than Bits, and users don't demand any
3397 // bits that were shifted into [Bits-1:0], then we can consider this as an
3398 // N-Bit user.
3399 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
3400 break;
3401 return false;
3402 }
3403 case RISCV::SEXT_B:
3404 case RISCV::PACKH:
3405 if (Bits >= 8)
3406 break;
3407 return false;
3408 case RISCV::SEXT_H:
3409 case RISCV::FMV_H_X:
3410 case RISCV::ZEXT_H_RV32:
3411 case RISCV::ZEXT_H_RV64:
3412 case RISCV::PACKW:
3413 if (Bits >= 16)
3414 break;
3415 return false;
3416 case RISCV::PACK:
3417 if (Bits >= (Subtarget->getXLen() / 2))
3418 break;
3419 return false;
3420 case RISCV::ADD_UW:
3421 case RISCV::SH1ADD_UW:
3422 case RISCV::SH2ADD_UW:
3423 case RISCV::SH3ADD_UW:
3424 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3425 // 32 bits.
3426 if (Use.getOperandNo() == 0 && Bits >= 32)
3427 break;
3428 return false;
3429 case RISCV::SB:
3430 if (Use.getOperandNo() == 0 && Bits >= 8)
3431 break;
3432 return false;
3433 case RISCV::SH:
3434 if (Use.getOperandNo() == 0 && Bits >= 16)
3435 break;
3436 return false;
3437 case RISCV::SW:
3438 if (Use.getOperandNo() == 0 && Bits >= 32)
3439 break;
3440 return false;
3441 }
3442 }
3443
3444 return true;
3445}
3446
3447// Select a constant that can be represented as (sign_extend(imm5) << imm2).
3449 SDValue &Shl2) {
3450 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3451 int64_t Offset = C->getSExtValue();
3452 unsigned Shift;
3453 for (Shift = 0; Shift < 4; Shift++)
3454 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
3455 break;
3456
3457 // Constant cannot be encoded.
3458 if (Shift == 4)
3459 return false;
3460
3461 EVT Ty = N->getValueType(0);
3462 Simm5 = CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(N), Ty);
3463 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
3464 return true;
3465 }
3466
3467 return false;
3468}
3469
3470// Select VL as a 5 bit immediate or a value that will become a register. This
3471// allows us to choose betwen VSETIVLI or VSETVLI later.
3473 auto *C = dyn_cast<ConstantSDNode>(N);
3474 if (C && isUInt<5>(C->getZExtValue())) {
3475 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
3476 N->getValueType(0));
3477 } else if (C && C->isAllOnes()) {
3478 // Treat all ones as VLMax.
3480 N->getValueType(0));
3481 } else if (isa<RegisterSDNode>(N) &&
3482 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
3483 // All our VL operands use an operand that allows GPRNoX0 or an immediate
3484 // as the register class. Convert X0 to a special immediate to pass the
3485 // MachineVerifier. This is recognized specially by the vsetvli insertion
3486 // pass.
3488 N->getValueType(0));
3489 } else {
3490 VL = N;
3491 }
3492
3493 return true;
3494}
3495
3497 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
3498 if (!N.getOperand(0).isUndef())
3499 return SDValue();
3500 N = N.getOperand(1);
3501 }
3502 SDValue Splat = N;
3503 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
3504 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
3505 !Splat.getOperand(0).isUndef())
3506 return SDValue();
3507 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
3508 return Splat;
3509}
3510
3513 if (!Splat)
3514 return false;
3515
3516 SplatVal = Splat.getOperand(1);
3517 return true;
3518}
3519
3521 SelectionDAG &DAG,
3522 const RISCVSubtarget &Subtarget,
3523 std::function<bool(int64_t)> ValidateImm) {
3525 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
3526 return false;
3527
3528 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
3529 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
3530 "Unexpected splat operand type");
3531
3532 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3533 // type is wider than the resulting vector element type: an implicit
3534 // truncation first takes place. Therefore, perform a manual
3535 // truncation/sign-extension in order to ignore any truncated bits and catch
3536 // any zero-extended immediate.
3537 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3538 // sign-extending to (XLenVT -1).
3539 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
3540
3541 int64_t SplatImm = SplatConst.getSExtValue();
3542
3543 if (!ValidateImm(SplatImm))
3544 return false;
3545
3546 SplatVal =
3547 DAG.getSignedTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
3548 return true;
3549}
3550
3552 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
3553 [](int64_t Imm) { return isInt<5>(Imm); });
3554}
3555
3557 return selectVSplatImmHelper(
3558 N, SplatVal, *CurDAG, *Subtarget,
3559 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
3560}
3561
3563 SDValue &SplatVal) {
3564 return selectVSplatImmHelper(
3565 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
3566 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
3567 });
3568}
3569
3571 SDValue &SplatVal) {
3572 return selectVSplatImmHelper(
3573 N, SplatVal, *CurDAG, *Subtarget,
3574 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
3575}
3576
3578 auto IsExtOrTrunc = [](SDValue N) {
3579 switch (N->getOpcode()) {
3580 case ISD::SIGN_EXTEND:
3581 case ISD::ZERO_EXTEND:
3582 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
3583 // inactive elements will be undef.
3585 case RISCVISD::VSEXT_VL:
3586 case RISCVISD::VZEXT_VL:
3587 return true;
3588 default:
3589 return false;
3590 }
3591 };
3592
3593 // We can have multiple nested nodes, so unravel them all if needed.
3594 while (IsExtOrTrunc(N)) {
3595 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
3596 return false;
3597 N = N->getOperand(0);
3598 }
3599
3600 return selectVSplat(N, SplatVal);
3601}
3602
3604 // Allow bitcasts from XLenVT -> FP.
3605 if (N.getOpcode() == ISD::BITCAST &&
3606 N.getOperand(0).getValueType() == Subtarget->getXLenVT()) {
3607 Imm = N.getOperand(0);
3608 return true;
3609 }
3610 // Allow moves from XLenVT to FP.
3611 if (N.getOpcode() == RISCVISD::FMV_H_X ||
3612 N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
3613 Imm = N.getOperand(0);
3614 return true;
3615 }
3616
3617 // Otherwise, look for FP constants that can materialized with scalar int.
3618 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3619 if (!CFP)
3620 return false;
3621 const APFloat &APF = CFP->getValueAPF();
3622 // td can handle +0.0 already.
3623 if (APF.isPosZero())
3624 return false;
3625
3626 MVT VT = CFP->getSimpleValueType(0);
3627
3628 MVT XLenVT = Subtarget->getXLenVT();
3629 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3630 assert(APF.isNegZero() && "Unexpected constant.");
3631 return false;
3632 }
3633 SDLoc DL(N);
3634 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3635 *Subtarget);
3636 return true;
3637}
3638
3640 SDValue &Imm) {
3641 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3642 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3643
3644 if (!isInt<5>(ImmVal))
3645 return false;
3646
3647 Imm = CurDAG->getSignedTargetConstant(ImmVal, SDLoc(N),
3648 Subtarget->getXLenVT());
3649 return true;
3650 }
3651
3652 return false;
3653}
3654
3655// Try to remove sext.w if the input is a W instruction or can be made into
3656// a W instruction cheaply.
3657bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3658 // Look for the sext.w pattern, addiw rd, rs1, 0.
3659 if (N->getMachineOpcode() != RISCV::ADDIW ||
3660 !isNullConstant(N->getOperand(1)))
3661 return false;
3662
3663 SDValue N0 = N->getOperand(0);
3664 if (!N0.isMachineOpcode())
3665 return false;
3666
3667 switch (N0.getMachineOpcode()) {
3668 default:
3669 break;
3670 case RISCV::ADD:
3671 case RISCV::ADDI:
3672 case RISCV::SUB:
3673 case RISCV::MUL:
3674 case RISCV::SLLI: {
3675 // Convert sext.w+add/sub/mul to their W instructions. This will create
3676 // a new independent instruction. This improves latency.
3677 unsigned Opc;
3678 switch (N0.getMachineOpcode()) {
3679 default:
3680 llvm_unreachable("Unexpected opcode!");
3681 case RISCV::ADD: Opc = RISCV::ADDW; break;
3682 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3683 case RISCV::SUB: Opc = RISCV::SUBW; break;
3684 case RISCV::MUL: Opc = RISCV::MULW; break;
3685 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3686 }
3687
3688 SDValue N00 = N0.getOperand(0);
3689 SDValue N01 = N0.getOperand(1);
3690
3691 // Shift amount needs to be uimm5.
3692 if (N0.getMachineOpcode() == RISCV::SLLI &&
3693 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
3694 break;
3695
3696 SDNode *Result =
3697 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
3698 N00, N01);
3699 ReplaceUses(N, Result);
3700 return true;
3701 }
3702 case RISCV::ADDW:
3703 case RISCV::ADDIW:
3704 case RISCV::SUBW:
3705 case RISCV::MULW:
3706 case RISCV::SLLIW:
3707 case RISCV::PACKW:
3708 case RISCV::TH_MULAW:
3709 case RISCV::TH_MULAH:
3710 case RISCV::TH_MULSW:
3711 case RISCV::TH_MULSH:
3712 if (N0.getValueType() == MVT::i32)
3713 break;
3714
3715 // Result is already sign extended just remove the sext.w.
3716 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
3717 ReplaceUses(N, N0.getNode());
3718 return true;
3719 }
3720
3721 return false;
3722}
3723
3724// After ISel, a vector pseudo's mask will be copied to V0 via a CopyToReg
3725// that's glued to the pseudo. This tries to look up the value that was copied
3726// to V0.
3727static SDValue getMaskSetter(SDValue MaskOp, SDValue GlueOp) {
3728 // Check that we're using V0 as a mask register.
3729 if (!isa<RegisterSDNode>(MaskOp) ||
3730 cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)
3731 return SDValue();
3732
3733 // The glued user defines V0.
3734 const auto *Glued = GlueOp.getNode();
3735
3736 if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
3737 return SDValue();
3738
3739 // Check that we're defining V0 as a mask register.
3740 if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
3741 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
3742 return SDValue();
3743
3744 SDValue MaskSetter = Glued->getOperand(2);
3745
3746 // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
3747 // from an extract_subvector or insert_subvector.
3748 if (MaskSetter->isMachineOpcode() &&
3749 MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)
3750 MaskSetter = MaskSetter->getOperand(0);
3751
3752 return MaskSetter;
3753}
3754
3755static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
3756 // Check the instruction defining V0; it needs to be a VMSET pseudo.
3757 SDValue MaskSetter = getMaskSetter(MaskOp, GlueOp);
3758 if (!MaskSetter)
3759 return false;
3760
3761 const auto IsVMSet = [](unsigned Opc) {
3762 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
3763 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
3764 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
3765 Opc == RISCV::PseudoVMSET_M_B8;
3766 };
3767
3768 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3769 // undefined behaviour if it's the wrong bitwidth, so we could choose to
3770 // assume that it's all-ones? Same applies to its VL.
3771 return MaskSetter->isMachineOpcode() &&
3772 IsVMSet(MaskSetter.getMachineOpcode());
3773}
3774
3775// Return true if we can make sure mask of N is all-ones mask.
3776static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
3777 return usesAllOnesMask(N->getOperand(MaskOpIdx),
3778 N->getOperand(N->getNumOperands() - 1));
3779}
3780
3781static bool isImplicitDef(SDValue V) {
3782 if (!V.isMachineOpcode())
3783 return false;
3784 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
3785 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
3786 if (!isImplicitDef(V.getOperand(I)))
3787 return false;
3788 return true;
3789 }
3790 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
3791}
3792
3793static bool hasGPROut(unsigned Opc) {
3794 switch (RISCV::getRVVMCOpcode(Opc)) {
3795 case RISCV::VCPOP_M:
3796 case RISCV::VFIRST_M:
3797 return true;
3798 }
3799 return false;
3800}
3801
3802// Optimize masked RVV pseudo instructions with a known all-ones mask to their
3803// corresponding "unmasked" pseudo versions. The mask we're interested in will
3804// take the form of a V0 physical register operand, with a glued
3805// register-setting instruction.
3806bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
3808 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
3809 if (!I)
3810 return false;
3811
3812 unsigned MaskOpIdx = I->MaskOpIdx;
3813 if (!usesAllOnesMask(N, MaskOpIdx))
3814 return false;
3815
3816 // There are two classes of pseudos in the table - compares and
3817 // everything else. See the comment on RISCVMaskedPseudo for details.
3818 const unsigned Opc = I->UnmaskedPseudo;
3819 const MCInstrDesc &MCID = TII->get(Opc);
3820 const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags);
3821#ifndef NDEBUG
3822 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
3825 "Masked and unmasked pseudos are inconsistent");
3826 const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID);
3827 assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure");
3828#endif
3829
3831 // Skip the passthru operand at index 0 if !UseTUPseudo and no GPR out.
3832 bool ShouldSkip = !UseTUPseudo && !hasGPROut(Opc);
3833 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
3834 // Skip the mask, and the Glue.
3835 SDValue Op = N->getOperand(I);
3836 if (I == MaskOpIdx || Op.getValueType() == MVT::Glue)
3837 continue;
3838 Ops.push_back(Op);
3839 }
3840
3841 // Transitively apply any node glued to our new node.
3842 const auto *Glued = N->getGluedNode();
3843 if (auto *TGlued = Glued->getGluedNode())
3844 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
3845
3847 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3848
3849 if (!N->memoperands_empty())
3850 CurDAG->setNodeMemRefs(Result, N->memoperands());
3851
3852 Result->setFlags(N->getFlags());
3853 ReplaceUses(N, Result);
3854
3855 return true;
3856}
3857
3858static bool IsVMerge(SDNode *N) {
3859 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
3860}
3861
3862// Try to fold away VMERGE_VVM instructions into their true operands:
3863//
3864// %true = PseudoVADD_VV ...
3865// %x = PseudoVMERGE_VVM %false, %false, %true, %mask
3866// ->
3867// %x = PseudoVADD_VV_MASK %false, ..., %mask
3868//
3869// We can only fold if vmerge's passthru operand, vmerge's false operand and
3870// %true's passthru operand (if it has one) are the same. This is because we
3871// have to consolidate them into one passthru operand in the result.
3872//
3873// If %true is masked, then we can use its mask instead of vmerge's if vmerge's
3874// mask is all ones.
3875//
3876// The resulting VL is the minimum of the two VLs.
3877//
3878// The resulting policy is the effective policy the vmerge would have had,
3879// i.e. whether or not it's passthru operand was implicit-def.
3880bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
3881 SDValue Passthru, False, True, VL, Mask, Glue;
3882 assert(IsVMerge(N));
3883 Passthru = N->getOperand(0);
3884 False = N->getOperand(1);
3885 True = N->getOperand(2);
3886 Mask = N->getOperand(3);
3887 VL = N->getOperand(4);
3888 // We always have a glue node for the mask at v0.
3889 Glue = N->getOperand(N->getNumOperands() - 1);
3890 assert(cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
3891 assert(Glue.getValueType() == MVT::Glue);
3892
3893 // If the EEW of True is different from vmerge's SEW, then we can't fold.
3894 if (True.getSimpleValueType() != N->getSimpleValueType(0))
3895 return false;
3896
3897 // We require that either passthru and false are the same, or that passthru
3898 // is undefined.
3899 if (Passthru != False && !isImplicitDef(Passthru))
3900 return false;
3901
3902 assert(True.getResNo() == 0 &&
3903 "Expect True is the first output of an instruction.");
3904
3905 // Need N is the exactly one using True.
3906 if (!True.hasOneUse())
3907 return false;
3908
3909 if (!True.isMachineOpcode())
3910 return false;
3911
3912 unsigned TrueOpc = True.getMachineOpcode();
3913 const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
3914 uint64_t TrueTSFlags = TrueMCID.TSFlags;
3915 bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
3916
3918 RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
3919 if (!Info)
3920 return false;
3921
3922 // If True has a passthru operand then it needs to be the same as vmerge's
3923 // False, since False will be used for the result's passthru operand.
3924 if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
3925 SDValue PassthruOpTrue = True->getOperand(0);
3926 if (False != PassthruOpTrue)
3927 return false;
3928 }
3929
3930 // Skip if True has side effect.
3931 if (TII->get(TrueOpc).hasUnmodeledSideEffects())
3932 return false;
3933
3934 // The last operand of a masked instruction may be glued.
3935 bool HasGlueOp = True->getGluedNode() != nullptr;
3936
3937 // The chain operand may exist either before the glued operands or in the last
3938 // position.
3939 unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
3940 bool HasChainOp =
3941 True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
3942
3943 if (HasChainOp) {
3944 // Avoid creating cycles in the DAG. We must ensure that none of the other
3945 // operands depend on True through it's Chain.
3946 SmallVector<const SDNode *, 4> LoopWorklist;
3948 LoopWorklist.push_back(False.getNode());
3949 LoopWorklist.push_back(Mask.getNode());
3950 LoopWorklist.push_back(VL.getNode());
3951 LoopWorklist.push_back(Glue.getNode());
3952 if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
3953 return false;
3954 }
3955
3956 // The vector policy operand may be present for masked intrinsics
3957 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
3958 unsigned TrueVLIndex =
3959 True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3960 SDValue TrueVL = True.getOperand(TrueVLIndex);
3961 SDValue SEW = True.getOperand(TrueVLIndex + 1);
3962
3963 auto GetMinVL = [](SDValue LHS, SDValue RHS) {
3964 if (LHS == RHS)
3965 return LHS;
3966 if (isAllOnesConstant(LHS))
3967 return RHS;
3968 if (isAllOnesConstant(RHS))
3969 return LHS;
3970 auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
3971 auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
3972 if (!CLHS || !CRHS)
3973 return SDValue();
3974 return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
3975 };
3976
3977 // Because N and True must have the same passthru operand (or True's operand
3978 // is implicit_def), the "effective" body is the minimum of their VLs.
3979 SDValue OrigVL = VL;
3980 VL = GetMinVL(TrueVL, VL);
3981 if (!VL)
3982 return false;
3983
3984 // Some operations produce different elementwise results depending on the
3985 // active elements, like viota.m or vredsum. This transformation is illegal
3986 // for these if we change the active elements (i.e. mask or VL).
3987 const MCInstrDesc &TrueBaseMCID = TII->get(RISCV::getRVVMCOpcode(TrueOpc));
3988 if (RISCVII::elementsDependOnVL(TrueBaseMCID.TSFlags) && (TrueVL != VL))
3989 return false;
3990 if (RISCVII::elementsDependOnMask(TrueBaseMCID.TSFlags) &&
3991 (Mask && !usesAllOnesMask(Mask, Glue)))
3992 return false;
3993
3994 // Make sure it doesn't raise any observable fp exceptions, since changing the
3995 // active elements will affect how fflags is set.
3996 if (mayRaiseFPException(True.getNode()) && !True->getFlags().hasNoFPExcept())
3997 return false;
3998
3999 SDLoc DL(N);
4000
4001 unsigned MaskedOpc = Info->MaskedPseudo;
4002#ifndef NDEBUG
4003 const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
4005 "Expected instructions with mask have policy operand.");
4006 assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
4007 MCOI::TIED_TO) == 0 &&
4008 "Expected instructions with mask have a tied dest.");
4009#endif
4010
4011 // Use a tumu policy, relaxing it to tail agnostic provided that the passthru
4012 // operand is undefined.
4013 //
4014 // However, if the VL became smaller than what the vmerge had originally, then
4015 // elements past VL that were previously in the vmerge's body will have moved
4016 // to the tail. In that case we always need to use tail undisturbed to
4017 // preserve them.
4018 bool MergeVLShrunk = VL != OrigVL;
4019 uint64_t Policy = (isImplicitDef(Passthru) && !MergeVLShrunk)
4021 : /*TUMU*/ 0;
4022 SDValue PolicyOp =
4023 CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
4024
4025
4027 Ops.push_back(False);
4028
4029 const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
4030 const unsigned NormalOpsEnd = TrueVLIndex - HasRoundingMode;
4031 Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
4032
4033 Ops.push_back(Mask);
4034
4035 // For unmasked "VOp" with rounding mode operand, that is interfaces like
4036 // (..., rm, vl) or (..., rm, vl, policy).
4037 // Its masked version is (..., vm, rm, vl, policy).
4038 // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
4039 if (HasRoundingMode)
4040 Ops.push_back(True->getOperand(TrueVLIndex - 1));
4041
4042 Ops.append({VL, SEW, PolicyOp});
4043
4044 // Result node should have chain operand of True.
4045 if (HasChainOp)
4046 Ops.push_back(True.getOperand(TrueChainOpIdx));
4047
4048 // Add the glue for the CopyToReg of mask->v0.
4049 Ops.push_back(Glue);
4050
4052 CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
4053 Result->setFlags(True->getFlags());
4054
4055 if (!cast<MachineSDNode>(True)->memoperands_empty())
4056 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
4057
4058 // Replace vmerge.vvm node by Result.
4059 ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
4060
4061 // Replace another value of True. E.g. chain and VL.
4062 for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
4063 ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
4064
4065 return true;
4066}
4067
4068bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
4069 bool MadeChange = false;
4071
4072 while (Position != CurDAG->allnodes_begin()) {
4073 SDNode *N = &*--Position;
4074 if (N->use_empty() || !N->isMachineOpcode())
4075 continue;
4076
4077 if (IsVMerge(N))
4078 MadeChange |= performCombineVMergeAndVOps(N);
4079 }
4080 return MadeChange;
4081}
4082
4083/// If our passthru is an implicit_def, use noreg instead. This side
4084/// steps issues with MachineCSE not being able to CSE expressions with
4085/// IMPLICIT_DEF operands while preserving the semantic intent. See
4086/// pr64282 for context. Note that this transform is the last one
4087/// performed at ISEL DAG to DAG.
4088bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4089 bool MadeChange = false;
4091
4092 while (Position != CurDAG->allnodes_begin()) {
4093 SDNode *N = &*--Position;
4094 if (N->use_empty() || !N->isMachineOpcode())
4095 continue;
4096
4097 const unsigned Opc = N->getMachineOpcode();
4098 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
4100 !isImplicitDef(N->getOperand(0)))
4101 continue;
4102
4104 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
4105 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4106 SDValue Op = N->getOperand(I);
4107 Ops.push_back(Op);
4108 }
4109
4111 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4112 Result->setFlags(N->getFlags());
4113 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
4114 ReplaceUses(N, Result);
4115 MadeChange = true;
4116 }
4117 return MadeChange;
4118}
4119
4120
4121// This pass converts a legalized DAG into a RISCV-specific DAG, ready
4122// for instruction scheduling.
4124 CodeGenOptLevel OptLevel) {
4125 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4126}
4127
4129
4131 CodeGenOptLevel OptLevel)
4133 ID, std::make_unique<RISCVDAGToDAGISel>(TM, OptLevel)) {}
4134
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false, bool IsRV32Zdinx=false)
#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp)
static unsigned getSegInstNF(unsigned Intrinsic)
static bool hasGPROut(unsigned Opc)
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
#define INST_ALL_NF_CASE_WITH_FF(NAME)
#define CASE_VMSLT_OPCODES(lmulenum, suffix)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
static SDValue findVSplat(SDValue N)
static SDValue getMaskSetter(SDValue MaskOp, SDValue GlueOp)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm)
#define INST_ALL_NF_CASE(NAME)
static bool IsVMerge(SDNode *N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define PASS_NAME
Value * RHS
Value * LHS
bool isZero() const
Definition: APFloat.h:1436
APInt bitcastToAPInt() const
Definition: APFloat.h:1346
bool isPosZero() const
Definition: APFloat.h:1451
bool isNegZero() const
Definition: APFloat.h:1452
Class for arbitrary precision integers.
Definition: APInt.h:78
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
This class is used to form a handle around another node that is persistent and is updated across invo...
static StringRef getMemConstraintName(ConstraintCode C)
Definition: InlineAsm.h:467
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
Definition: MCInstrDesc.h:463
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Machine Value Type.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TargetMachine, CodeGenOptLevel OptLevel)
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset, bool IsRV32Zdinx=false)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset should be all zeros.
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset)
void selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
void selectVLSEGFF(SDNode *Node, unsigned NF, bool IsMasked)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
bool selectInvLogicImm(SDValue N, SDValue &Val)
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool selectScalarFPAsInt(SDValue N, SDValue &Imm)
bool hasAllBUsers(SDNode *Node) const
void selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
void selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
void selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
unsigned getXLen() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVII::VLMUL getLMUL(MVT VT)
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
op_iterator op_begin() const
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
const TargetLowering * TLI
MachineFunction * MF
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
bool mayRaiseFPException(SDNode *Node) const
Return whether the node may raise an FP exception.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:748
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:575
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:799
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:456
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:555
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:556
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:825
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:495
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:753
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:710
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:698
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:490
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:584
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:578
ilist< SDNode >::iterator allnodes_iterator
Definition: SelectionDAG.h:558
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
TargetInstrInfo - Interface to description of machine instruction set.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1312
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1292
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1308
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1494
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1551
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1602
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1647
static bool hasRoundModeOp(uint64_t TSFlags)
static bool hasVLOp(uint64_t TSFlags)
static bool elementsDependOnMask(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool elementsDependOnVL(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
@ SplitF64
Turns a f64 into a pair of i32s.
@ BuildPairF64
Turns a pair of i32s into an f64.
@ BuildGPRPair
Turn a pair of i<xlen>s into an even-odd register pair (untyped).
@ SplitGPRPair
Turn an even-odd register pair (untyped) into a pair of i<xlen>s.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul)
unsigned encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic)
std::optional< unsigned > getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
static const MachineMemOperand::Flags MONontemporalBit1
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:255
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:285
unsigned M1(unsigned Val)
Definition: VE.h:376
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
@ Add
Sum of integers.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:581
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
bool hasNoFPExcept() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.