LLVM 20.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
20#include "llvm/IR/IntrinsicsRISCV.h"
22#include "llvm/Support/Debug.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE "riscv-isel"
29#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
30
32 "riscv-use-rematerializable-movimm", cl::Hidden,
33 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
34 "constant materialization"),
35 cl::init(false));
36
37namespace llvm::RISCV {
38#define GET_RISCVVSSEGTable_IMPL
39#define GET_RISCVVLSEGTable_IMPL
40#define GET_RISCVVLXSEGTable_IMPL
41#define GET_RISCVVSXSEGTable_IMPL
42#define GET_RISCVVLETable_IMPL
43#define GET_RISCVVSETable_IMPL
44#define GET_RISCVVLXTable_IMPL
45#define GET_RISCVVSXTable_IMPL
46#include "RISCVGenSearchableTables.inc"
47} // namespace llvm::RISCV
48
51
52 bool MadeChange = false;
53 while (Position != CurDAG->allnodes_begin()) {
54 SDNode *N = &*--Position;
55 if (N->use_empty())
56 continue;
57
58 SDValue Result;
59 switch (N->getOpcode()) {
60 case ISD::SPLAT_VECTOR: {
61 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
62 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
63 MVT VT = N->getSimpleValueType(0);
64 unsigned Opc =
66 SDLoc DL(N);
67 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
68 SDValue Src = N->getOperand(0);
69 if (VT.isInteger())
70 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
71 N->getOperand(0));
72 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
73 break;
74 }
76 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
77 // load. Done after lowering and combining so that we have a chance to
78 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
79 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
80 MVT VT = N->getSimpleValueType(0);
81 SDValue Passthru = N->getOperand(0);
82 SDValue Lo = N->getOperand(1);
83 SDValue Hi = N->getOperand(2);
84 SDValue VL = N->getOperand(3);
85 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
86 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
87 "Unexpected VTs!");
89 SDLoc DL(N);
90
91 // Create temporary stack for each expanding node.
92 SDValue StackSlot =
94 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
96
97 SDValue Chain = CurDAG->getEntryNode();
98 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
99
100 SDValue OffsetSlot =
102 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
103 Align(8));
104
105 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
106
107 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
108 SDValue IntID =
109 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
110 SDValue Ops[] = {Chain,
111 IntID,
112 Passthru,
113 StackSlot,
114 CurDAG->getRegister(RISCV::X0, MVT::i64),
115 VL};
116
118 MVT::i64, MPI, Align(8),
120 break;
121 }
122 }
123
124 if (Result) {
125 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
126 LLVM_DEBUG(N->dump(CurDAG));
127 LLVM_DEBUG(dbgs() << "\nNew: ");
128 LLVM_DEBUG(Result->dump(CurDAG));
129 LLVM_DEBUG(dbgs() << "\n");
130
132 MadeChange = true;
133 }
134 }
135
136 if (MadeChange)
138}
139
141 HandleSDNode Dummy(CurDAG->getRoot());
143
144 bool MadeChange = false;
145 while (Position != CurDAG->allnodes_begin()) {
146 SDNode *N = &*--Position;
147 // Skip dead nodes and any non-machine opcodes.
148 if (N->use_empty() || !N->isMachineOpcode())
149 continue;
150
151 MadeChange |= doPeepholeSExtW(N);
152
153 // FIXME: This is here only because the VMerge transform doesn't
154 // know how to handle masked true inputs. Once that has been moved
155 // to post-ISEL, this can be deleted as well.
156 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
157 }
158
159 CurDAG->setRoot(Dummy.getValue());
160
161 MadeChange |= doPeepholeMergeVVMFold();
162
163 // After we're done with everything else, convert IMPLICIT_DEF
164 // passthru operands to NoRegister. This is required to workaround
165 // an optimization deficiency in MachineCSE. This really should
166 // be merged back into each of the patterns (i.e. there's no good
167 // reason not to go directly to NoReg), but is being done this way
168 // to allow easy backporting.
169 MadeChange |= doPeepholeNoRegPassThru();
170
171 if (MadeChange)
173}
174
175static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
177 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
178 for (const RISCVMatInt::Inst &Inst : Seq) {
179 SDValue SDImm = CurDAG->getSignedTargetConstant(Inst.getImm(), DL, VT);
180 SDNode *Result = nullptr;
181 switch (Inst.getOpndKind()) {
182 case RISCVMatInt::Imm:
183 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
184 break;
186 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
187 CurDAG->getRegister(RISCV::X0, VT));
188 break;
190 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
191 break;
193 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
194 break;
195 }
196
197 // Only the first instruction has X0 as its source.
198 SrcReg = SDValue(Result, 0);
199 }
200
201 return SrcReg;
202}
203
204static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
205 int64_t Imm, const RISCVSubtarget &Subtarget) {
207
208 // Use a rematerializable pseudo instruction for short sequences if enabled.
209 if (Seq.size() == 2 && UsePseudoMovImm)
210 return SDValue(
211 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
212 CurDAG->getSignedTargetConstant(Imm, DL, VT)),
213 0);
214
215 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
216 // worst an LUI+ADDIW. This will require an extra register, but avoids a
217 // constant pool.
218 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
219 // low and high 32 bits are the same and bit 31 and 63 are set.
220 if (Seq.size() > 3) {
221 unsigned ShiftAmt, AddOpc;
223 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
224 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
225 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
226
227 SDValue SLLI = SDValue(
228 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
229 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
230 0);
231 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
232 }
233 }
234
235 // Otherwise, use the original sequence.
236 return selectImmSeq(CurDAG, DL, VT, Seq);
237}
238
240 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
241 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
242 bool IsLoad, MVT *IndexVT) {
243 SDValue Chain = Node->getOperand(0);
244 SDValue Glue;
245
246 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
247
248 if (IsStridedOrIndexed) {
249 Operands.push_back(Node->getOperand(CurOp++)); // Index.
250 if (IndexVT)
251 *IndexVT = Operands.back()->getSimpleValueType(0);
252 }
253
254 if (IsMasked) {
255 // Mask needs to be copied to V0.
256 SDValue Mask = Node->getOperand(CurOp++);
257 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
258 Glue = Chain.getValue(1);
259 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
260 }
261 SDValue VL;
262 selectVLOp(Node->getOperand(CurOp++), VL);
263 Operands.push_back(VL);
264
265 MVT XLenVT = Subtarget->getXLenVT();
266 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
267 Operands.push_back(SEWOp);
268
269 // At the IR layer, all the masked load intrinsics have policy operands,
270 // none of the others do. All have passthru operands. For our pseudos,
271 // all loads have policy operands.
272 if (IsLoad) {
274 if (IsMasked)
275 Policy = Node->getConstantOperandVal(CurOp++);
276 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
277 Operands.push_back(PolicyOp);
278 }
279
280 Operands.push_back(Chain); // Chain.
281 if (Glue)
282 Operands.push_back(Glue);
283}
284
285void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
286 bool IsStrided) {
287 SDLoc DL(Node);
288 MVT VT = Node->getSimpleValueType(0);
289 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
291
292 unsigned CurOp = 2;
294
295 Operands.push_back(Node->getOperand(CurOp++));
296
297 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
298 Operands, /*IsLoad=*/true);
299
300 const RISCV::VLSEGPseudo *P =
301 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
302 static_cast<unsigned>(LMUL));
303 MachineSDNode *Load =
304 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
305
306 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
307 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
308
309 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
310 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
311 CurDAG->RemoveDeadNode(Node);
312}
313
315 bool IsMasked) {
316 SDLoc DL(Node);
317 MVT VT = Node->getSimpleValueType(0);
318 MVT XLenVT = Subtarget->getXLenVT();
319 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
321
322 unsigned CurOp = 2;
324
325 Operands.push_back(Node->getOperand(CurOp++));
326
327 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
328 /*IsStridedOrIndexed*/ false, Operands,
329 /*IsLoad=*/true);
330
331 const RISCV::VLSEGPseudo *P =
332 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
333 Log2SEW, static_cast<unsigned>(LMUL));
334 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
335 XLenVT, MVT::Other, Operands);
336
337 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
338 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
339
340 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); // Result
341 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); // VL
342 ReplaceUses(SDValue(Node, 2), SDValue(Load, 2)); // Chain
343 CurDAG->RemoveDeadNode(Node);
344}
345
346void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
347 bool IsOrdered) {
348 SDLoc DL(Node);
349 MVT VT = Node->getSimpleValueType(0);
350 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
352
353 unsigned CurOp = 2;
355
356 Operands.push_back(Node->getOperand(CurOp++));
357
358 MVT IndexVT;
359 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
360 /*IsStridedOrIndexed*/ true, Operands,
361 /*IsLoad=*/true, &IndexVT);
362
363#ifndef NDEBUG
364 // Number of element = RVVBitsPerBlock * LMUL / SEW
365 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
366 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
367 if (DecodedLMUL.second)
368 ContainedTyNumElts /= DecodedLMUL.first;
369 else
370 ContainedTyNumElts *= DecodedLMUL.first;
371 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
372 "Element count mismatch");
373#endif
374
375 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
376 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
377 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
378 report_fatal_error("The V extension does not support EEW=64 for index "
379 "values when XLEN=32");
380 }
381 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
382 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
383 static_cast<unsigned>(IndexLMUL));
384 MachineSDNode *Load =
385 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
386
387 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
388 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
389
390 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
391 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
392 CurDAG->RemoveDeadNode(Node);
393}
394
395void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
396 bool IsStrided) {
397 SDLoc DL(Node);
398 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
399 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
401
402 unsigned CurOp = 2;
404
405 Operands.push_back(Node->getOperand(CurOp++));
406
407 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
408 Operands);
409
410 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
411 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
412 MachineSDNode *Store =
413 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
414
415 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
416 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
417
418 ReplaceNode(Node, Store);
419}
420
421void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
422 bool IsOrdered) {
423 SDLoc DL(Node);
424 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
425 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
427
428 unsigned CurOp = 2;
430
431 Operands.push_back(Node->getOperand(CurOp++));
432
433 MVT IndexVT;
434 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
435 /*IsStridedOrIndexed*/ true, Operands,
436 /*IsLoad=*/false, &IndexVT);
437
438#ifndef NDEBUG
439 // Number of element = RVVBitsPerBlock * LMUL / SEW
440 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
441 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
442 if (DecodedLMUL.second)
443 ContainedTyNumElts /= DecodedLMUL.first;
444 else
445 ContainedTyNumElts *= DecodedLMUL.first;
446 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
447 "Element count mismatch");
448#endif
449
450 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
451 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
452 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
453 report_fatal_error("The V extension does not support EEW=64 for index "
454 "values when XLEN=32");
455 }
456 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
457 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
458 static_cast<unsigned>(IndexLMUL));
459 MachineSDNode *Store =
460 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
461
462 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
463 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
464
465 ReplaceNode(Node, Store);
466}
467
469 if (!Subtarget->hasVInstructions())
470 return;
471
472 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
473
474 SDLoc DL(Node);
475 MVT XLenVT = Subtarget->getXLenVT();
476
477 unsigned IntNo = Node->getConstantOperandVal(0);
478
479 assert((IntNo == Intrinsic::riscv_vsetvli ||
480 IntNo == Intrinsic::riscv_vsetvlimax) &&
481 "Unexpected vsetvli intrinsic");
482
483 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
484 unsigned Offset = (VLMax ? 1 : 2);
485
486 assert(Node->getNumOperands() == Offset + 2 &&
487 "Unexpected number of operands");
488
489 unsigned SEW =
490 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
491 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
492 Node->getConstantOperandVal(Offset + 1) & 0x7);
493
494 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
495 /*MaskAgnostic*/ true);
496 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
497
498 SDValue VLOperand;
499 unsigned Opcode = RISCV::PseudoVSETVLI;
500 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
501 if (auto VLEN = Subtarget->getRealVLen())
502 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
503 VLMax = true;
504 }
505 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
506 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
507 Opcode = RISCV::PseudoVSETVLIX0;
508 } else {
509 VLOperand = Node->getOperand(1);
510
511 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
512 uint64_t AVL = C->getZExtValue();
513 if (isUInt<5>(AVL)) {
514 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
515 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
516 XLenVT, VLImm, VTypeIOp));
517 return;
518 }
519 }
520 }
521
522 ReplaceNode(Node,
523 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
524}
525
527 MVT VT = Node->getSimpleValueType(0);
528 unsigned Opcode = Node->getOpcode();
529 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
530 "Unexpected opcode");
531 SDLoc DL(Node);
532
533 // For operations of the form (x << C1) op C2, check if we can use
534 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
535 SDValue N0 = Node->getOperand(0);
536 SDValue N1 = Node->getOperand(1);
537
538 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
539 if (!Cst)
540 return false;
541
542 int64_t Val = Cst->getSExtValue();
543
544 // Check if immediate can already use ANDI/ORI/XORI.
545 if (isInt<12>(Val))
546 return false;
547
548 SDValue Shift = N0;
549
550 // If Val is simm32 and we have a sext_inreg from i32, then the binop
551 // produces at least 33 sign bits. We can peek through the sext_inreg and use
552 // a SLLIW at the end.
553 bool SignExt = false;
554 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
555 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
556 SignExt = true;
557 Shift = N0.getOperand(0);
558 }
559
560 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
561 return false;
562
563 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
564 if (!ShlCst)
565 return false;
566
567 uint64_t ShAmt = ShlCst->getZExtValue();
568
569 // Make sure that we don't change the operation by removing bits.
570 // This only matters for OR and XOR, AND is unaffected.
571 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
572 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
573 return false;
574
575 int64_t ShiftedVal = Val >> ShAmt;
576 if (!isInt<12>(ShiftedVal))
577 return false;
578
579 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
580 if (SignExt && ShAmt >= 32)
581 return false;
582
583 // Ok, we can reorder to get a smaller immediate.
584 unsigned BinOpc;
585 switch (Opcode) {
586 default: llvm_unreachable("Unexpected opcode");
587 case ISD::AND: BinOpc = RISCV::ANDI; break;
588 case ISD::OR: BinOpc = RISCV::ORI; break;
589 case ISD::XOR: BinOpc = RISCV::XORI; break;
590 }
591
592 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
593
594 SDNode *BinOp = CurDAG->getMachineNode(
595 BinOpc, DL, VT, Shift.getOperand(0),
596 CurDAG->getSignedTargetConstant(ShiftedVal, DL, VT));
597 SDNode *SLLI =
598 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
599 CurDAG->getTargetConstant(ShAmt, DL, VT));
600 ReplaceNode(Node, SLLI);
601 return true;
602}
603
605 // Only supported with XTHeadBb at the moment.
606 if (!Subtarget->hasVendorXTHeadBb())
607 return false;
608
609 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
610 if (!N1C)
611 return false;
612
613 SDValue N0 = Node->getOperand(0);
614 if (!N0.hasOneUse())
615 return false;
616
617 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
618 MVT VT) {
619 return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
620 CurDAG->getTargetConstant(Msb, DL, VT),
621 CurDAG->getTargetConstant(Lsb, DL, VT));
622 };
623
624 SDLoc DL(Node);
625 MVT VT = Node->getSimpleValueType(0);
626 const unsigned RightShAmt = N1C->getZExtValue();
627
628 // Transform (sra (shl X, C1) C2) with C1 < C2
629 // -> (TH.EXT X, msb, lsb)
630 if (N0.getOpcode() == ISD::SHL) {
631 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
632 if (!N01C)
633 return false;
634
635 const unsigned LeftShAmt = N01C->getZExtValue();
636 // Make sure that this is a bitfield extraction (i.e., the shift-right
637 // amount can not be less than the left-shift).
638 if (LeftShAmt > RightShAmt)
639 return false;
640
641 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
642 const unsigned Msb = MsbPlusOne - 1;
643 const unsigned Lsb = RightShAmt - LeftShAmt;
644
645 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
646 ReplaceNode(Node, TH_EXT);
647 return true;
648 }
649
650 // Transform (sra (sext_inreg X, _), C) ->
651 // (TH.EXT X, msb, lsb)
652 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
653 unsigned ExtSize =
654 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
655
656 // ExtSize of 32 should use sraiw via tablegen pattern.
657 if (ExtSize == 32)
658 return false;
659
660 const unsigned Msb = ExtSize - 1;
661 const unsigned Lsb = RightShAmt;
662
663 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
664 ReplaceNode(Node, TH_EXT);
665 return true;
666 }
667
668 return false;
669}
670
672 // Target does not support indexed loads.
673 if (!Subtarget->hasVendorXTHeadMemIdx())
674 return false;
675
676 LoadSDNode *Ld = cast<LoadSDNode>(Node);
678 if (AM == ISD::UNINDEXED)
679 return false;
680
681 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
682 if (!C)
683 return false;
684
685 EVT LoadVT = Ld->getMemoryVT();
686 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
687 "Unexpected addressing mode");
688 bool IsPre = AM == ISD::PRE_INC;
689 bool IsPost = AM == ISD::POST_INC;
690 int64_t Offset = C->getSExtValue();
691
692 // The constants that can be encoded in the THeadMemIdx instructions
693 // are of the form (sign_extend(imm5) << imm2).
694 unsigned Shift;
695 for (Shift = 0; Shift < 4; Shift++)
696 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
697 break;
698
699 // Constant cannot be encoded.
700 if (Shift == 4)
701 return false;
702
703 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
704 unsigned Opcode;
705 if (LoadVT == MVT::i8 && IsPre)
706 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
707 else if (LoadVT == MVT::i8 && IsPost)
708 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
709 else if (LoadVT == MVT::i16 && IsPre)
710 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
711 else if (LoadVT == MVT::i16 && IsPost)
712 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
713 else if (LoadVT == MVT::i32 && IsPre)
714 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
715 else if (LoadVT == MVT::i32 && IsPost)
716 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
717 else if (LoadVT == MVT::i64 && IsPre)
718 Opcode = RISCV::TH_LDIB;
719 else if (LoadVT == MVT::i64 && IsPost)
720 Opcode = RISCV::TH_LDIA;
721 else
722 return false;
723
724 EVT Ty = Ld->getOffset().getValueType();
725 SDValue Ops[] = {
726 Ld->getBasePtr(),
727 CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
728 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), Ld->getChain()};
729 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
730 Ld->getValueType(1), MVT::Other, Ops);
731
732 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
733 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
734
735 ReplaceNode(Node, New);
736
737 return true;
738}
739
741 if (!Subtarget->hasVInstructions())
742 return;
743
744 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
745
746 SDLoc DL(Node);
747 unsigned IntNo = Node->getConstantOperandVal(1);
748
749 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
750 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
751 "Unexpected vsetvli intrinsic");
752
753 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
754 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
755 SDValue SEWOp =
756 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
757 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
758 Node->getOperand(4), Node->getOperand(5),
759 Node->getOperand(8), SEWOp,
760 Node->getOperand(0)};
761
762 unsigned Opcode;
763 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
764 switch (LMulSDNode->getSExtValue()) {
765 case 5:
766 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF8
767 : RISCV::PseudoVC_I_SE_MF8;
768 break;
769 case 6:
770 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF4
771 : RISCV::PseudoVC_I_SE_MF4;
772 break;
773 case 7:
774 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF2
775 : RISCV::PseudoVC_I_SE_MF2;
776 break;
777 case 0:
778 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M1
779 : RISCV::PseudoVC_I_SE_M1;
780 break;
781 case 1:
782 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M2
783 : RISCV::PseudoVC_I_SE_M2;
784 break;
785 case 2:
786 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M4
787 : RISCV::PseudoVC_I_SE_M4;
788 break;
789 case 3:
790 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M8
791 : RISCV::PseudoVC_I_SE_M8;
792 break;
793 }
794
796 Opcode, DL, Node->getSimpleValueType(0), Operands));
797}
798
799static unsigned getSegInstNF(unsigned Intrinsic) {
800#define INST_NF_CASE(NAME, NF) \
801 case Intrinsic::riscv_##NAME##NF: \
802 return NF;
803#define INST_NF_CASE_MASK(NAME, NF) \
804 case Intrinsic::riscv_##NAME##NF##_mask: \
805 return NF;
806#define INST_NF_CASE_FF(NAME, NF) \
807 case Intrinsic::riscv_##NAME##NF##ff: \
808 return NF;
809#define INST_NF_CASE_FF_MASK(NAME, NF) \
810 case Intrinsic::riscv_##NAME##NF##ff_mask: \
811 return NF;
812#define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
813 MACRO_NAME(NAME, 2) \
814 MACRO_NAME(NAME, 3) \
815 MACRO_NAME(NAME, 4) \
816 MACRO_NAME(NAME, 5) \
817 MACRO_NAME(NAME, 6) \
818 MACRO_NAME(NAME, 7) \
819 MACRO_NAME(NAME, 8)
820#define INST_ALL_NF_CASE(NAME) \
821 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
822 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
823#define INST_ALL_NF_CASE_WITH_FF(NAME) \
824 INST_ALL_NF_CASE(NAME) \
825 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
826 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
827 switch (Intrinsic) {
828 default:
829 llvm_unreachable("Unexpected segment load/store intrinsic");
831 INST_ALL_NF_CASE(vlsseg)
832 INST_ALL_NF_CASE(vloxseg)
833 INST_ALL_NF_CASE(vluxseg)
834 INST_ALL_NF_CASE(vsseg)
835 INST_ALL_NF_CASE(vssseg)
836 INST_ALL_NF_CASE(vsoxseg)
837 INST_ALL_NF_CASE(vsuxseg)
838 }
839}
840
842 // If we have a custom node, we have already selected.
843 if (Node->isMachineOpcode()) {
844 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
845 Node->setNodeId(-1);
846 return;
847 }
848
849 // Instruction Selection not handled by the auto-generated tablegen selection
850 // should be handled here.
851 unsigned Opcode = Node->getOpcode();
852 MVT XLenVT = Subtarget->getXLenVT();
853 SDLoc DL(Node);
854 MVT VT = Node->getSimpleValueType(0);
855
856 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
857
858 switch (Opcode) {
859 case ISD::Constant: {
860 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
861 auto *ConstNode = cast<ConstantSDNode>(Node);
862 if (ConstNode->isZero()) {
863 SDValue New =
864 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
865 ReplaceNode(Node, New.getNode());
866 return;
867 }
868 int64_t Imm = ConstNode->getSExtValue();
869 // If only the lower 8 bits are used, try to convert this to a simm6 by
870 // sign-extending bit 7. This is neutral without the C extension, and
871 // allows C.LI to be used if C is present.
872 if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
873 Imm = SignExtend64<8>(Imm);
874 // If the upper XLen-16 bits are not used, try to convert this to a simm12
875 // by sign extending bit 15.
876 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
877 hasAllHUsers(Node))
878 Imm = SignExtend64<16>(Imm);
879 // If the upper 32-bits are not used try to convert this into a simm32 by
880 // sign extending bit 32.
881 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
882 Imm = SignExtend64<32>(Imm);
883
884 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
885 return;
886 }
887 case ISD::ConstantFP: {
888 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
889
890 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
891 SDValue Imm;
892 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
893 // create an integer immediate.
894 if (APF.isPosZero() || NegZeroF64)
895 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
896 else
897 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
898 *Subtarget);
899
900 bool HasZdinx = Subtarget->hasStdExtZdinx();
901 bool Is64Bit = Subtarget->is64Bit();
902 unsigned Opc;
903 switch (VT.SimpleTy) {
904 default:
905 llvm_unreachable("Unexpected size");
906 case MVT::bf16:
907 assert(Subtarget->hasStdExtZfbfmin());
908 Opc = RISCV::FMV_H_X;
909 break;
910 case MVT::f16:
911 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
912 break;
913 case MVT::f32:
914 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
915 break;
916 case MVT::f64:
917 // For RV32, we can't move from a GPR, we need to convert instead. This
918 // should only happen for +0.0 and -0.0.
919 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
920 if (Is64Bit)
921 Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
922 else
923 Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
924 break;
925 }
926
927 SDNode *Res;
928 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
929 Res =
930 CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
931 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
932 Res =
933 CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
934 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
935 Res = CurDAG->getMachineNode(
936 Opc, DL, VT, Imm,
938 else
939 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
940
941 // For f64 -0.0, we need to insert a fneg.d idiom.
942 if (NegZeroF64) {
943 Opc = RISCV::FSGNJN_D;
944 if (HasZdinx)
945 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
946 Res =
947 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
948 }
949
950 ReplaceNode(Node, Res);
951 return;
952 }
955 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
956 break;
957
958 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) &&
959 "BuildPairF64 only handled here on rv32i_zdinx");
960
961 SDValue Ops[] = {
962 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
963 Node->getOperand(0),
964 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
965 Node->getOperand(1),
966 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
967
968 SDNode *N = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
969 ReplaceNode(Node, N);
970 return;
971 }
973 case RISCVISD::SplitF64: {
974 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
975 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) &&
976 "SplitF64 only handled here on rv32i_zdinx");
977
978 if (!SDValue(Node, 0).use_empty()) {
979 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
980 Node->getValueType(0),
981 Node->getOperand(0));
982 ReplaceUses(SDValue(Node, 0), Lo);
983 }
984
985 if (!SDValue(Node, 1).use_empty()) {
987 RISCV::sub_gpr_odd, DL, Node->getValueType(1), Node->getOperand(0));
988 ReplaceUses(SDValue(Node, 1), Hi);
989 }
990
991 CurDAG->RemoveDeadNode(Node);
992 return;
993 }
994
995 assert(Opcode != RISCVISD::SplitGPRPair &&
996 "SplitGPRPair should already be handled");
997
998 if (!Subtarget->hasStdExtZfa())
999 break;
1000 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1001 "Unexpected subtarget");
1002
1003 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1004 if (!SDValue(Node, 0).use_empty()) {
1005 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1006 Node->getOperand(0));
1007 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1008 }
1009 if (!SDValue(Node, 1).use_empty()) {
1010 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1011 Node->getOperand(0));
1012 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1013 }
1014
1015 CurDAG->RemoveDeadNode(Node);
1016 return;
1017 }
1018 case ISD::SHL: {
1019 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1020 if (!N1C)
1021 break;
1022 SDValue N0 = Node->getOperand(0);
1023 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1024 !isa<ConstantSDNode>(N0.getOperand(1)))
1025 break;
1026 unsigned ShAmt = N1C->getZExtValue();
1027 uint64_t Mask = N0.getConstantOperandVal(1);
1028
1029 if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
1030 unsigned XLen = Subtarget->getXLen();
1031 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1032 unsigned TrailingZeros = llvm::countr_zero(Mask);
1033 if (TrailingZeros > 0 && LeadingZeros == 32) {
1034 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1035 // where C2 has 32 leading zeros and C3 trailing zeros.
1036 SDNode *SRLIW = CurDAG->getMachineNode(
1037 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1038 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1039 SDNode *SLLI = CurDAG->getMachineNode(
1040 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1041 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1042 ReplaceNode(Node, SLLI);
1043 return;
1044 }
1045 if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1046 XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1047 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1048 // where C2 has C4 leading zeros and no trailing zeros.
1049 // This is profitable if the "and" was to be lowered to
1050 // (srli (slli X, C4), C4) and not (andi X, C2).
1051 // For "LeadingZeros == 32":
1052 // - with Zba it's just (slli.uw X, C)
1053 // - without Zba a tablegen pattern applies the very same
1054 // transform as we would have done here
1055 SDNode *SLLI = CurDAG->getMachineNode(
1056 RISCV::SLLI, DL, VT, N0->getOperand(0),
1057 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1058 SDNode *SRLI = CurDAG->getMachineNode(
1059 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1060 CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
1061 ReplaceNode(Node, SRLI);
1062 return;
1063 }
1064 }
1065 break;
1066 }
1067 case ISD::SRL: {
1068 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1069 if (!N1C)
1070 break;
1071 SDValue N0 = Node->getOperand(0);
1072 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1073 break;
1074 unsigned ShAmt = N1C->getZExtValue();
1075 uint64_t Mask = N0.getConstantOperandVal(1);
1076
1077 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1078 // 32 leading zeros and C3 trailing zeros.
1079 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1080 unsigned XLen = Subtarget->getXLen();
1081 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1082 unsigned TrailingZeros = llvm::countr_zero(Mask);
1083 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1084 SDNode *SRLIW = CurDAG->getMachineNode(
1085 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1086 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1087 SDNode *SLLI = CurDAG->getMachineNode(
1088 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1089 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1090 ReplaceNode(Node, SLLI);
1091 return;
1092 }
1093 }
1094
1095 // Optimize (srl (and X, C2), C) ->
1096 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1097 // Where C2 is a mask with C3 trailing ones.
1098 // Taking into account that the C2 may have had lower bits unset by
1099 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1100 // This pattern occurs when type legalizing right shifts for types with
1101 // less than XLen bits.
1102 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1103 if (!isMask_64(Mask))
1104 break;
1105 unsigned TrailingOnes = llvm::countr_one(Mask);
1106 if (ShAmt >= TrailingOnes)
1107 break;
1108 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1109 if (TrailingOnes == 32) {
1110 SDNode *SRLI = CurDAG->getMachineNode(
1111 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1112 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1113 ReplaceNode(Node, SRLI);
1114 return;
1115 }
1116
1117 // Only do the remaining transforms if the AND has one use.
1118 if (!N0.hasOneUse())
1119 break;
1120
1121 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1122 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1123 SDNode *BEXTI = CurDAG->getMachineNode(
1124 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1125 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1126 ReplaceNode(Node, BEXTI);
1127 return;
1128 }
1129
1130 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1131 if (Subtarget->hasVendorXTHeadBb()) {
1132 SDNode *THEXTU = CurDAG->getMachineNode(
1133 RISCV::TH_EXTU, DL, VT, N0->getOperand(0),
1134 CurDAG->getTargetConstant(TrailingOnes - 1, DL, VT),
1135 CurDAG->getTargetConstant(ShAmt, DL, VT));
1136 ReplaceNode(Node, THEXTU);
1137 return;
1138 }
1139
1140 SDNode *SLLI =
1141 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1142 CurDAG->getTargetConstant(LShAmt, DL, VT));
1143 SDNode *SRLI = CurDAG->getMachineNode(
1144 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1145 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1146 ReplaceNode(Node, SRLI);
1147 return;
1148 }
1149 case ISD::SRA: {
1150 if (trySignedBitfieldExtract(Node))
1151 return;
1152
1153 // Optimize (sra (sext_inreg X, i16), C) ->
1154 // (srai (slli X, (XLen-16), (XLen-16) + C)
1155 // And (sra (sext_inreg X, i8), C) ->
1156 // (srai (slli X, (XLen-8), (XLen-8) + C)
1157 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1158 // This transform matches the code we get without Zbb. The shifts are more
1159 // compressible, and this can help expose CSE opportunities in the sdiv by
1160 // constant optimization.
1161 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1162 if (!N1C)
1163 break;
1164 SDValue N0 = Node->getOperand(0);
1165 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1166 break;
1167 unsigned ShAmt = N1C->getZExtValue();
1168 unsigned ExtSize =
1169 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1170 // ExtSize of 32 should use sraiw via tablegen pattern.
1171 if (ExtSize >= 32 || ShAmt >= ExtSize)
1172 break;
1173 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1174 SDNode *SLLI =
1175 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1176 CurDAG->getTargetConstant(LShAmt, DL, VT));
1177 SDNode *SRAI = CurDAG->getMachineNode(
1178 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1179 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1180 ReplaceNode(Node, SRAI);
1181 return;
1182 }
1183 case ISD::OR:
1184 case ISD::XOR:
1185 if (tryShrinkShlLogicImm(Node))
1186 return;
1187
1188 break;
1189 case ISD::AND: {
1190 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1191 if (!N1C)
1192 break;
1193
1194 SDValue N0 = Node->getOperand(0);
1195
1196 auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
1197 SDValue X, unsigned Msb,
1198 unsigned Lsb) {
1199 if (!Subtarget->hasVendorXTHeadBb())
1200 return false;
1201
1202 SDNode *TH_EXTU = CurDAG->getMachineNode(
1203 RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
1204 CurDAG->getTargetConstant(Lsb, DL, VT));
1205 ReplaceNode(Node, TH_EXTU);
1206 return true;
1207 };
1208
1209 bool LeftShift = N0.getOpcode() == ISD::SHL;
1210 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1211 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1212 if (!C)
1213 break;
1214 unsigned C2 = C->getZExtValue();
1215 unsigned XLen = Subtarget->getXLen();
1216 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1217
1218 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1219 // shift pair might offer more compression opportunities.
1220 // TODO: We could check for C extension here, but we don't have many lit
1221 // tests with the C extension enabled so not checking gets better
1222 // coverage.
1223 // TODO: What if ANDI faster than shift?
1224 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1225
1226 uint64_t C1 = N1C->getZExtValue();
1227
1228 // Clear irrelevant bits in the mask.
1229 if (LeftShift)
1230 C1 &= maskTrailingZeros<uint64_t>(C2);
1231 else
1232 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1233
1234 // Some transforms should only be done if the shift has a single use or
1235 // the AND would become (srli (slli X, 32), 32)
1236 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1237
1238 SDValue X = N0.getOperand(0);
1239
1240 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1241 // with c3 leading zeros.
1242 if (!LeftShift && isMask_64(C1)) {
1243 unsigned Leading = XLen - llvm::bit_width(C1);
1244 if (C2 < Leading) {
1245 // If the number of leading zeros is C2+32 this can be SRLIW.
1246 if (C2 + 32 == Leading) {
1247 SDNode *SRLIW = CurDAG->getMachineNode(
1248 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1249 ReplaceNode(Node, SRLIW);
1250 return;
1251 }
1252
1253 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1254 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1255 //
1256 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1257 // legalized and goes through DAG combine.
1258 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1259 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1260 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1261 SDNode *SRAIW =
1262 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1263 CurDAG->getTargetConstant(31, DL, VT));
1264 SDNode *SRLIW = CurDAG->getMachineNode(
1265 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1266 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1267 ReplaceNode(Node, SRLIW);
1268 return;
1269 }
1270
1271 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1272 // available.
1273 // Transform (and (srl x, C2), C1)
1274 // -> (<bfextract> x, msb, lsb)
1275 //
1276 // Make sure to keep this below the SRLIW cases, as we always want to
1277 // prefer the more common instruction.
1278 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1279 const unsigned Lsb = C2;
1280 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1281 return;
1282
1283 // (srli (slli x, c3-c2), c3).
1284 // Skip if we could use (zext.w (sraiw X, C2)).
1285 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1286 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1287 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1288 // Also Skip if we can use bexti or th.tst.
1289 Skip |= HasBitTest && Leading == XLen - 1;
1290 if (OneUseOrZExtW && !Skip) {
1291 SDNode *SLLI = CurDAG->getMachineNode(
1292 RISCV::SLLI, DL, VT, X,
1293 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1294 SDNode *SRLI = CurDAG->getMachineNode(
1295 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1296 CurDAG->getTargetConstant(Leading, DL, VT));
1297 ReplaceNode(Node, SRLI);
1298 return;
1299 }
1300 }
1301 }
1302
1303 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1304 // shifted by c2 bits with c3 leading zeros.
1305 if (LeftShift && isShiftedMask_64(C1)) {
1306 unsigned Leading = XLen - llvm::bit_width(C1);
1307
1308 if (C2 + Leading < XLen &&
1309 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1310 // Use slli.uw when possible.
1311 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1312 SDNode *SLLI_UW =
1313 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1314 CurDAG->getTargetConstant(C2, DL, VT));
1315 ReplaceNode(Node, SLLI_UW);
1316 return;
1317 }
1318
1319 // (srli (slli c2+c3), c3)
1320 if (OneUseOrZExtW && !IsCANDI) {
1321 SDNode *SLLI = CurDAG->getMachineNode(
1322 RISCV::SLLI, DL, VT, X,
1323 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1324 SDNode *SRLI = CurDAG->getMachineNode(
1325 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1326 CurDAG->getTargetConstant(Leading, DL, VT));
1327 ReplaceNode(Node, SRLI);
1328 return;
1329 }
1330 }
1331 }
1332
1333 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1334 // shifted mask with c2 leading zeros and c3 trailing zeros.
1335 if (!LeftShift && isShiftedMask_64(C1)) {
1336 unsigned Leading = XLen - llvm::bit_width(C1);
1337 unsigned Trailing = llvm::countr_zero(C1);
1338 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1339 !IsCANDI) {
1340 unsigned SrliOpc = RISCV::SRLI;
1341 // If the input is zexti32 we should use SRLIW.
1342 if (X.getOpcode() == ISD::AND &&
1343 isa<ConstantSDNode>(X.getOperand(1)) &&
1344 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1345 SrliOpc = RISCV::SRLIW;
1346 X = X.getOperand(0);
1347 }
1348 SDNode *SRLI = CurDAG->getMachineNode(
1349 SrliOpc, DL, VT, X,
1350 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1351 SDNode *SLLI = CurDAG->getMachineNode(
1352 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1353 CurDAG->getTargetConstant(Trailing, DL, VT));
1354 ReplaceNode(Node, SLLI);
1355 return;
1356 }
1357 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1358 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1359 OneUseOrZExtW && !IsCANDI) {
1360 SDNode *SRLIW = CurDAG->getMachineNode(
1361 RISCV::SRLIW, DL, VT, X,
1362 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1363 SDNode *SLLI = CurDAG->getMachineNode(
1364 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1365 CurDAG->getTargetConstant(Trailing, DL, VT));
1366 ReplaceNode(Node, SLLI);
1367 return;
1368 }
1369 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1370 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1371 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1372 SDNode *SRLI = CurDAG->getMachineNode(
1373 RISCV::SRLI, DL, VT, X,
1374 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1375 SDNode *SLLI_UW = CurDAG->getMachineNode(
1376 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1377 CurDAG->getTargetConstant(Trailing, DL, VT));
1378 ReplaceNode(Node, SLLI_UW);
1379 return;
1380 }
1381 }
1382
1383 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1384 // shifted mask with no leading zeros and c3 trailing zeros.
1385 if (LeftShift && isShiftedMask_64(C1)) {
1386 unsigned Leading = XLen - llvm::bit_width(C1);
1387 unsigned Trailing = llvm::countr_zero(C1);
1388 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1389 SDNode *SRLI = CurDAG->getMachineNode(
1390 RISCV::SRLI, DL, VT, X,
1391 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1392 SDNode *SLLI = CurDAG->getMachineNode(
1393 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1394 CurDAG->getTargetConstant(Trailing, DL, VT));
1395 ReplaceNode(Node, SLLI);
1396 return;
1397 }
1398 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1399 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1400 SDNode *SRLIW = CurDAG->getMachineNode(
1401 RISCV::SRLIW, DL, VT, X,
1402 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1403 SDNode *SLLI = CurDAG->getMachineNode(
1404 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1405 CurDAG->getTargetConstant(Trailing, DL, VT));
1406 ReplaceNode(Node, SLLI);
1407 return;
1408 }
1409
1410 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1411 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1412 Subtarget->hasStdExtZba()) {
1413 SDNode *SRLI = CurDAG->getMachineNode(
1414 RISCV::SRLI, DL, VT, X,
1415 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1416 SDNode *SLLI_UW = CurDAG->getMachineNode(
1417 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1418 CurDAG->getTargetConstant(Trailing, DL, VT));
1419 ReplaceNode(Node, SLLI_UW);
1420 return;
1421 }
1422 }
1423 }
1424
1425 const uint64_t C1 = N1C->getZExtValue();
1426
1427 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1428 N0.hasOneUse()) {
1429 unsigned C2 = N0.getConstantOperandVal(1);
1430 unsigned XLen = Subtarget->getXLen();
1431 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1432
1433 SDValue X = N0.getOperand(0);
1434
1435 // Prefer SRAIW + ANDI when possible.
1436 bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1437 X.getOpcode() == ISD::SHL &&
1438 isa<ConstantSDNode>(X.getOperand(1)) &&
1439 X.getConstantOperandVal(1) == 32;
1440 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1441 // mask with c3 leading zeros and c2 is larger than c3.
1442 if (isMask_64(C1) && !Skip) {
1443 unsigned Leading = XLen - llvm::bit_width(C1);
1444 if (C2 > Leading) {
1445 SDNode *SRAI = CurDAG->getMachineNode(
1446 RISCV::SRAI, DL, VT, X,
1447 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1448 SDNode *SRLI = CurDAG->getMachineNode(
1449 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1450 CurDAG->getTargetConstant(Leading, DL, VT));
1451 ReplaceNode(Node, SRLI);
1452 return;
1453 }
1454 }
1455
1456 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1457 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1458 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1459 if (isShiftedMask_64(C1) && !Skip) {
1460 unsigned Leading = XLen - llvm::bit_width(C1);
1461 unsigned Trailing = llvm::countr_zero(C1);
1462 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1463 SDNode *SRAI = CurDAG->getMachineNode(
1464 RISCV::SRAI, DL, VT, N0.getOperand(0),
1465 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1466 SDNode *SRLI = CurDAG->getMachineNode(
1467 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1468 CurDAG->getTargetConstant(Leading + Trailing, DL, VT));
1469 SDNode *SLLI = CurDAG->getMachineNode(
1470 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1471 CurDAG->getTargetConstant(Trailing, DL, VT));
1472 ReplaceNode(Node, SLLI);
1473 return;
1474 }
1475 }
1476 }
1477
1478 // If C1 masks off the upper bits only (but can't be formed as an
1479 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1480 // available.
1481 // Transform (and x, C1)
1482 // -> (<bfextract> x, msb, lsb)
1483 if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue())) {
1484 const unsigned Msb = llvm::bit_width(C1) - 1;
1485 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1486 return;
1487 }
1488
1489 if (tryShrinkShlLogicImm(Node))
1490 return;
1491
1492 break;
1493 }
1494 case ISD::MUL: {
1495 // Special case for calculating (mul (and X, C2), C1) where the full product
1496 // fits in XLen bits. We can shift X left by the number of leading zeros in
1497 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1498 // product has XLen trailing zeros, putting it in the output of MULHU. This
1499 // can avoid materializing a constant in a register for C2.
1500
1501 // RHS should be a constant.
1502 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1503 if (!N1C || !N1C->hasOneUse())
1504 break;
1505
1506 // LHS should be an AND with constant.
1507 SDValue N0 = Node->getOperand(0);
1508 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1509 break;
1510
1512
1513 // Constant should be a mask.
1514 if (!isMask_64(C2))
1515 break;
1516
1517 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1518 // multiple users or the constant is a simm12. This prevents inserting a
1519 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1520 // make it more costly to materialize. Otherwise, using a SLLI might allow
1521 // it to be compressed.
1522 bool IsANDIOrZExt =
1523 isInt<12>(C2) ||
1524 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1525 // With XTHeadBb, we can use TH.EXTU.
1526 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1527 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1528 break;
1529 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1530 // the constant is a simm32.
1531 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1532 // With XTHeadBb, we can use TH.EXTU.
1533 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1534 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1535 break;
1536
1537 // We need to shift left the AND input and C1 by a total of XLen bits.
1538
1539 // How far left do we need to shift the AND input?
1540 unsigned XLen = Subtarget->getXLen();
1541 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1542
1543 // The constant gets shifted by the remaining amount unless that would
1544 // shift bits out.
1545 uint64_t C1 = N1C->getZExtValue();
1546 unsigned ConstantShift = XLen - LeadingZeros;
1547 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1548 break;
1549
1550 uint64_t ShiftedC1 = C1 << ConstantShift;
1551 // If this RV32, we need to sign extend the constant.
1552 if (XLen == 32)
1553 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1554
1555 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1556 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1557 SDNode *SLLI =
1558 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1559 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1560 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1561 SDValue(SLLI, 0), SDValue(Imm, 0));
1562 ReplaceNode(Node, MULHU);
1563 return;
1564 }
1565 case ISD::LOAD: {
1566 if (tryIndexedLoad(Node))
1567 return;
1568
1569 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1570 // We match post-incrementing load here
1571 LoadSDNode *Load = cast<LoadSDNode>(Node);
1572 if (Load->getAddressingMode() != ISD::POST_INC)
1573 break;
1574
1575 SDValue Chain = Node->getOperand(0);
1576 SDValue Base = Node->getOperand(1);
1577 SDValue Offset = Node->getOperand(2);
1578
1579 bool Simm12 = false;
1580 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1581
1582 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1583 int ConstantVal = ConstantOffset->getSExtValue();
1584 Simm12 = isInt<12>(ConstantVal);
1585 if (Simm12)
1586 Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1587 Offset.getValueType());
1588 }
1589
1590 unsigned Opcode = 0;
1591 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1592 case MVT::i8:
1593 if (Simm12 && SignExtend)
1594 Opcode = RISCV::CV_LB_ri_inc;
1595 else if (Simm12 && !SignExtend)
1596 Opcode = RISCV::CV_LBU_ri_inc;
1597 else if (!Simm12 && SignExtend)
1598 Opcode = RISCV::CV_LB_rr_inc;
1599 else
1600 Opcode = RISCV::CV_LBU_rr_inc;
1601 break;
1602 case MVT::i16:
1603 if (Simm12 && SignExtend)
1604 Opcode = RISCV::CV_LH_ri_inc;
1605 else if (Simm12 && !SignExtend)
1606 Opcode = RISCV::CV_LHU_ri_inc;
1607 else if (!Simm12 && SignExtend)
1608 Opcode = RISCV::CV_LH_rr_inc;
1609 else
1610 Opcode = RISCV::CV_LHU_rr_inc;
1611 break;
1612 case MVT::i32:
1613 if (Simm12)
1614 Opcode = RISCV::CV_LW_ri_inc;
1615 else
1616 Opcode = RISCV::CV_LW_rr_inc;
1617 break;
1618 default:
1619 break;
1620 }
1621 if (!Opcode)
1622 break;
1623
1624 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1625 Chain.getSimpleValueType(), Base,
1626 Offset, Chain));
1627 return;
1628 }
1629 break;
1630 }
1632 unsigned IntNo = Node->getConstantOperandVal(0);
1633 switch (IntNo) {
1634 // By default we do not custom select any intrinsic.
1635 default:
1636 break;
1637 case Intrinsic::riscv_vmsgeu:
1638 case Intrinsic::riscv_vmsge: {
1639 SDValue Src1 = Node->getOperand(1);
1640 SDValue Src2 = Node->getOperand(2);
1641 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1642 bool IsCmpConstant = false;
1643 bool IsCmpMinimum = false;
1644 // Only custom select scalar second operand.
1645 if (Src2.getValueType() != XLenVT)
1646 break;
1647 // Small constants are handled with patterns.
1648 int64_t CVal = 0;
1649 MVT Src1VT = Src1.getSimpleValueType();
1650 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1651 IsCmpConstant = true;
1652 CVal = C->getSExtValue();
1653 if (CVal >= -15 && CVal <= 16) {
1654 if (!IsUnsigned || CVal != 0)
1655 break;
1656 IsCmpMinimum = true;
1657 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1658 Src1VT.getScalarSizeInBits())
1659 .getSExtValue()) {
1660 IsCmpMinimum = true;
1661 }
1662 }
1663 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
1664 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1665 default:
1666 llvm_unreachable("Unexpected LMUL!");
1667#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
1668 case RISCVII::VLMUL::lmulenum: \
1669 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1670 : RISCV::PseudoVMSLT_VX_##suffix; \
1671 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
1672 : RISCV::PseudoVMSGT_VX_##suffix; \
1673 break;
1674 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1675 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1676 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1677 CASE_VMSLT_OPCODES(LMUL_1, M1)
1678 CASE_VMSLT_OPCODES(LMUL_2, M2)
1679 CASE_VMSLT_OPCODES(LMUL_4, M4)
1680 CASE_VMSLT_OPCODES(LMUL_8, M8)
1681#undef CASE_VMSLT_OPCODES
1682 }
1683 // Mask operations use the LMUL from the mask type.
1684 switch (RISCVTargetLowering::getLMUL(VT)) {
1685 default:
1686 llvm_unreachable("Unexpected LMUL!");
1687#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
1688 case RISCVII::VLMUL::lmulenum: \
1689 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1690 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
1691 break;
1692 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
1693 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
1694 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
1695 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
1696 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
1697 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
1698 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
1699#undef CASE_VMNAND_VMSET_OPCODES
1700 }
1702 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1703 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1704 SDValue VL;
1705 selectVLOp(Node->getOperand(3), VL);
1706
1707 // If vmsge(u) with minimum value, expand it to vmset.
1708 if (IsCmpMinimum) {
1709 ReplaceNode(Node,
1710 CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, MaskSEW));
1711 return;
1712 }
1713
1714 if (IsCmpConstant) {
1715 SDValue Imm =
1716 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
1717
1718 ReplaceNode(Node, CurDAG->getMachineNode(VMSGTOpcode, DL, VT,
1719 {Src1, Imm, VL, SEW}));
1720 return;
1721 }
1722
1723 // Expand to
1724 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1725 SDValue Cmp = SDValue(
1726 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1727 0);
1728 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1729 {Cmp, Cmp, VL, MaskSEW}));
1730 return;
1731 }
1732 case Intrinsic::riscv_vmsgeu_mask:
1733 case Intrinsic::riscv_vmsge_mask: {
1734 SDValue Src1 = Node->getOperand(2);
1735 SDValue Src2 = Node->getOperand(3);
1736 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1737 bool IsCmpConstant = false;
1738 bool IsCmpMinimum = false;
1739 // Only custom select scalar second operand.
1740 if (Src2.getValueType() != XLenVT)
1741 break;
1742 // Small constants are handled with patterns.
1743 MVT Src1VT = Src1.getSimpleValueType();
1744 int64_t CVal = 0;
1745 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1746 IsCmpConstant = true;
1747 CVal = C->getSExtValue();
1748 if (CVal >= -15 && CVal <= 16) {
1749 if (!IsUnsigned || CVal != 0)
1750 break;
1751 IsCmpMinimum = true;
1752 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1753 Src1VT.getScalarSizeInBits())
1754 .getSExtValue()) {
1755 IsCmpMinimum = true;
1756 }
1757 }
1758 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1759 VMOROpcode, VMSGTMaskOpcode;
1760 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1761 default:
1762 llvm_unreachable("Unexpected LMUL!");
1763#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
1764 case RISCVII::VLMUL::lmulenum: \
1765 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1766 : RISCV::PseudoVMSLT_VX_##suffix; \
1767 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
1768 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
1769 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
1770 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
1771 break;
1772 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1773 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1774 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1775 CASE_VMSLT_OPCODES(LMUL_1, M1)
1776 CASE_VMSLT_OPCODES(LMUL_2, M2)
1777 CASE_VMSLT_OPCODES(LMUL_4, M4)
1778 CASE_VMSLT_OPCODES(LMUL_8, M8)
1779#undef CASE_VMSLT_OPCODES
1780 }
1781 // Mask operations use the LMUL from the mask type.
1782 switch (RISCVTargetLowering::getLMUL(VT)) {
1783 default:
1784 llvm_unreachable("Unexpected LMUL!");
1785#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
1786 case RISCVII::VLMUL::lmulenum: \
1787 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
1788 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
1789 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
1790 break;
1791 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
1792 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
1793 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
1798#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1799 }
1801 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1802 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1803 SDValue VL;
1804 selectVLOp(Node->getOperand(5), VL);
1805 SDValue MaskedOff = Node->getOperand(1);
1806 SDValue Mask = Node->getOperand(4);
1807
1808 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
1809 if (IsCmpMinimum) {
1810 // We don't need vmor if the MaskedOff and the Mask are the same
1811 // value.
1812 if (Mask == MaskedOff) {
1813 ReplaceUses(Node, Mask.getNode());
1814 return;
1815 }
1816 ReplaceNode(Node,
1817 CurDAG->getMachineNode(VMOROpcode, DL, VT,
1818 {Mask, MaskedOff, VL, MaskSEW}));
1819 return;
1820 }
1821
1822 // If the MaskedOff value and the Mask are the same value use
1823 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
1824 // This avoids needing to copy v0 to vd before starting the next sequence.
1825 if (Mask == MaskedOff) {
1826 SDValue Cmp = SDValue(
1827 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1828 0);
1829 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1830 {Mask, Cmp, VL, MaskSEW}));
1831 return;
1832 }
1833
1834 // Mask needs to be copied to V0.
1836 RISCV::V0, Mask, SDValue());
1837 SDValue Glue = Chain.getValue(1);
1838 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1839
1840 if (IsCmpConstant) {
1841 SDValue Imm =
1842 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
1843
1845 VMSGTMaskOpcode, DL, VT,
1846 {MaskedOff, Src1, Imm, V0, VL, SEW, Glue}));
1847 return;
1848 }
1849
1850 // Otherwise use
1851 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1852 // The result is mask undisturbed.
1853 // We use the same instructions to emulate mask agnostic behavior, because
1854 // the agnostic result can be either undisturbed or all 1.
1855 SDValue Cmp = SDValue(
1856 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1857 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1858 0);
1859 // vmxor.mm vd, vd, v0 is used to update active value.
1860 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1861 {Cmp, Mask, VL, MaskSEW}));
1862 return;
1863 }
1864 case Intrinsic::riscv_vsetvli:
1865 case Intrinsic::riscv_vsetvlimax:
1866 return selectVSETVLI(Node);
1867 }
1868 break;
1869 }
1871 unsigned IntNo = Node->getConstantOperandVal(1);
1872 switch (IntNo) {
1873 // By default we do not custom select any intrinsic.
1874 default:
1875 break;
1876 case Intrinsic::riscv_vlseg2:
1877 case Intrinsic::riscv_vlseg3:
1878 case Intrinsic::riscv_vlseg4:
1879 case Intrinsic::riscv_vlseg5:
1880 case Intrinsic::riscv_vlseg6:
1881 case Intrinsic::riscv_vlseg7:
1882 case Intrinsic::riscv_vlseg8: {
1883 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
1884 /*IsStrided*/ false);
1885 return;
1886 }
1887 case Intrinsic::riscv_vlseg2_mask:
1888 case Intrinsic::riscv_vlseg3_mask:
1889 case Intrinsic::riscv_vlseg4_mask:
1890 case Intrinsic::riscv_vlseg5_mask:
1891 case Intrinsic::riscv_vlseg6_mask:
1892 case Intrinsic::riscv_vlseg7_mask:
1893 case Intrinsic::riscv_vlseg8_mask: {
1894 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
1895 /*IsStrided*/ false);
1896 return;
1897 }
1898 case Intrinsic::riscv_vlsseg2:
1899 case Intrinsic::riscv_vlsseg3:
1900 case Intrinsic::riscv_vlsseg4:
1901 case Intrinsic::riscv_vlsseg5:
1902 case Intrinsic::riscv_vlsseg6:
1903 case Intrinsic::riscv_vlsseg7:
1904 case Intrinsic::riscv_vlsseg8: {
1905 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
1906 /*IsStrided*/ true);
1907 return;
1908 }
1909 case Intrinsic::riscv_vlsseg2_mask:
1910 case Intrinsic::riscv_vlsseg3_mask:
1911 case Intrinsic::riscv_vlsseg4_mask:
1912 case Intrinsic::riscv_vlsseg5_mask:
1913 case Intrinsic::riscv_vlsseg6_mask:
1914 case Intrinsic::riscv_vlsseg7_mask:
1915 case Intrinsic::riscv_vlsseg8_mask: {
1916 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
1917 /*IsStrided*/ true);
1918 return;
1919 }
1920 case Intrinsic::riscv_vloxseg2:
1921 case Intrinsic::riscv_vloxseg3:
1922 case Intrinsic::riscv_vloxseg4:
1923 case Intrinsic::riscv_vloxseg5:
1924 case Intrinsic::riscv_vloxseg6:
1925 case Intrinsic::riscv_vloxseg7:
1926 case Intrinsic::riscv_vloxseg8:
1927 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
1928 /*IsOrdered*/ true);
1929 return;
1930 case Intrinsic::riscv_vluxseg2:
1931 case Intrinsic::riscv_vluxseg3:
1932 case Intrinsic::riscv_vluxseg4:
1933 case Intrinsic::riscv_vluxseg5:
1934 case Intrinsic::riscv_vluxseg6:
1935 case Intrinsic::riscv_vluxseg7:
1936 case Intrinsic::riscv_vluxseg8:
1937 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
1938 /*IsOrdered*/ false);
1939 return;
1940 case Intrinsic::riscv_vloxseg2_mask:
1941 case Intrinsic::riscv_vloxseg3_mask:
1942 case Intrinsic::riscv_vloxseg4_mask:
1943 case Intrinsic::riscv_vloxseg5_mask:
1944 case Intrinsic::riscv_vloxseg6_mask:
1945 case Intrinsic::riscv_vloxseg7_mask:
1946 case Intrinsic::riscv_vloxseg8_mask:
1947 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
1948 /*IsOrdered*/ true);
1949 return;
1950 case Intrinsic::riscv_vluxseg2_mask:
1951 case Intrinsic::riscv_vluxseg3_mask:
1952 case Intrinsic::riscv_vluxseg4_mask:
1953 case Intrinsic::riscv_vluxseg5_mask:
1954 case Intrinsic::riscv_vluxseg6_mask:
1955 case Intrinsic::riscv_vluxseg7_mask:
1956 case Intrinsic::riscv_vluxseg8_mask:
1957 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
1958 /*IsOrdered*/ false);
1959 return;
1960 case Intrinsic::riscv_vlseg8ff:
1961 case Intrinsic::riscv_vlseg7ff:
1962 case Intrinsic::riscv_vlseg6ff:
1963 case Intrinsic::riscv_vlseg5ff:
1964 case Intrinsic::riscv_vlseg4ff:
1965 case Intrinsic::riscv_vlseg3ff:
1966 case Intrinsic::riscv_vlseg2ff: {
1967 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ false);
1968 return;
1969 }
1970 case Intrinsic::riscv_vlseg8ff_mask:
1971 case Intrinsic::riscv_vlseg7ff_mask:
1972 case Intrinsic::riscv_vlseg6ff_mask:
1973 case Intrinsic::riscv_vlseg5ff_mask:
1974 case Intrinsic::riscv_vlseg4ff_mask:
1975 case Intrinsic::riscv_vlseg3ff_mask:
1976 case Intrinsic::riscv_vlseg2ff_mask: {
1977 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ true);
1978 return;
1979 }
1980 case Intrinsic::riscv_vloxei:
1981 case Intrinsic::riscv_vloxei_mask:
1982 case Intrinsic::riscv_vluxei:
1983 case Intrinsic::riscv_vluxei_mask: {
1984 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1985 IntNo == Intrinsic::riscv_vluxei_mask;
1986 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1987 IntNo == Intrinsic::riscv_vloxei_mask;
1988
1989 MVT VT = Node->getSimpleValueType(0);
1990 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1991
1992 unsigned CurOp = 2;
1994 Operands.push_back(Node->getOperand(CurOp++));
1995
1996 MVT IndexVT;
1997 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1998 /*IsStridedOrIndexed*/ true, Operands,
1999 /*IsLoad=*/true, &IndexVT);
2000
2002 "Element count mismatch");
2003
2005 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2006 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2007 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2008 report_fatal_error("The V extension does not support EEW=64 for index "
2009 "values when XLEN=32");
2010 }
2011 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2012 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
2013 static_cast<unsigned>(IndexLMUL));
2014 MachineSDNode *Load =
2015 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2016
2017 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2018 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2019
2020 ReplaceNode(Node, Load);
2021 return;
2022 }
2023 case Intrinsic::riscv_vlm:
2024 case Intrinsic::riscv_vle:
2025 case Intrinsic::riscv_vle_mask:
2026 case Intrinsic::riscv_vlse:
2027 case Intrinsic::riscv_vlse_mask: {
2028 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2029 IntNo == Intrinsic::riscv_vlse_mask;
2030 bool IsStrided =
2031 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2032
2033 MVT VT = Node->getSimpleValueType(0);
2034 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2035
2036 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2037 // operand at the IR level. In pseudos, they have both policy and
2038 // passthru operand. The passthru operand is needed to track the
2039 // "tail undefined" state, and the policy is there just for
2040 // for consistency - it will always be "don't care" for the
2041 // unmasked form.
2042 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2043 unsigned CurOp = 2;
2045 if (HasPassthruOperand)
2046 Operands.push_back(Node->getOperand(CurOp++));
2047 else {
2048 // We eagerly lower to implicit_def (instead of undef), as we
2049 // otherwise fail to select nodes such as: nxv1i1 = undef
2050 SDNode *Passthru =
2051 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
2052 Operands.push_back(SDValue(Passthru, 0));
2053 }
2054 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2055 Operands, /*IsLoad=*/true);
2056
2058 const RISCV::VLEPseudo *P =
2059 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2060 static_cast<unsigned>(LMUL));
2061 MachineSDNode *Load =
2062 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2063
2064 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2065 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2066
2067 ReplaceNode(Node, Load);
2068 return;
2069 }
2070 case Intrinsic::riscv_vleff:
2071 case Intrinsic::riscv_vleff_mask: {
2072 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2073
2074 MVT VT = Node->getSimpleValueType(0);
2075 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2076
2077 unsigned CurOp = 2;
2079 Operands.push_back(Node->getOperand(CurOp++));
2080 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2081 /*IsStridedOrIndexed*/ false, Operands,
2082 /*IsLoad=*/true);
2083
2085 const RISCV::VLEPseudo *P =
2086 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2087 Log2SEW, static_cast<unsigned>(LMUL));
2089 P->Pseudo, DL, Node->getVTList(), Operands);
2090 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2091 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2092
2093 ReplaceNode(Node, Load);
2094 return;
2095 }
2096 }
2097 break;
2098 }
2099 case ISD::INTRINSIC_VOID: {
2100 unsigned IntNo = Node->getConstantOperandVal(1);
2101 switch (IntNo) {
2102 case Intrinsic::riscv_vsseg2:
2103 case Intrinsic::riscv_vsseg3:
2104 case Intrinsic::riscv_vsseg4:
2105 case Intrinsic::riscv_vsseg5:
2106 case Intrinsic::riscv_vsseg6:
2107 case Intrinsic::riscv_vsseg7:
2108 case Intrinsic::riscv_vsseg8: {
2109 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2110 /*IsStrided*/ false);
2111 return;
2112 }
2113 case Intrinsic::riscv_vsseg2_mask:
2114 case Intrinsic::riscv_vsseg3_mask:
2115 case Intrinsic::riscv_vsseg4_mask:
2116 case Intrinsic::riscv_vsseg5_mask:
2117 case Intrinsic::riscv_vsseg6_mask:
2118 case Intrinsic::riscv_vsseg7_mask:
2119 case Intrinsic::riscv_vsseg8_mask: {
2120 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2121 /*IsStrided*/ false);
2122 return;
2123 }
2124 case Intrinsic::riscv_vssseg2:
2125 case Intrinsic::riscv_vssseg3:
2126 case Intrinsic::riscv_vssseg4:
2127 case Intrinsic::riscv_vssseg5:
2128 case Intrinsic::riscv_vssseg6:
2129 case Intrinsic::riscv_vssseg7:
2130 case Intrinsic::riscv_vssseg8: {
2131 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2132 /*IsStrided*/ true);
2133 return;
2134 }
2135 case Intrinsic::riscv_vssseg2_mask:
2136 case Intrinsic::riscv_vssseg3_mask:
2137 case Intrinsic::riscv_vssseg4_mask:
2138 case Intrinsic::riscv_vssseg5_mask:
2139 case Intrinsic::riscv_vssseg6_mask:
2140 case Intrinsic::riscv_vssseg7_mask:
2141 case Intrinsic::riscv_vssseg8_mask: {
2142 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2143 /*IsStrided*/ true);
2144 return;
2145 }
2146 case Intrinsic::riscv_vsoxseg2:
2147 case Intrinsic::riscv_vsoxseg3:
2148 case Intrinsic::riscv_vsoxseg4:
2149 case Intrinsic::riscv_vsoxseg5:
2150 case Intrinsic::riscv_vsoxseg6:
2151 case Intrinsic::riscv_vsoxseg7:
2152 case Intrinsic::riscv_vsoxseg8:
2153 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2154 /*IsOrdered*/ true);
2155 return;
2156 case Intrinsic::riscv_vsuxseg2:
2157 case Intrinsic::riscv_vsuxseg3:
2158 case Intrinsic::riscv_vsuxseg4:
2159 case Intrinsic::riscv_vsuxseg5:
2160 case Intrinsic::riscv_vsuxseg6:
2161 case Intrinsic::riscv_vsuxseg7:
2162 case Intrinsic::riscv_vsuxseg8:
2163 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2164 /*IsOrdered*/ false);
2165 return;
2166 case Intrinsic::riscv_vsoxseg2_mask:
2167 case Intrinsic::riscv_vsoxseg3_mask:
2168 case Intrinsic::riscv_vsoxseg4_mask:
2169 case Intrinsic::riscv_vsoxseg5_mask:
2170 case Intrinsic::riscv_vsoxseg6_mask:
2171 case Intrinsic::riscv_vsoxseg7_mask:
2172 case Intrinsic::riscv_vsoxseg8_mask:
2173 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2174 /*IsOrdered*/ true);
2175 return;
2176 case Intrinsic::riscv_vsuxseg2_mask:
2177 case Intrinsic::riscv_vsuxseg3_mask:
2178 case Intrinsic::riscv_vsuxseg4_mask:
2179 case Intrinsic::riscv_vsuxseg5_mask:
2180 case Intrinsic::riscv_vsuxseg6_mask:
2181 case Intrinsic::riscv_vsuxseg7_mask:
2182 case Intrinsic::riscv_vsuxseg8_mask:
2183 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2184 /*IsOrdered*/ false);
2185 return;
2186 case Intrinsic::riscv_vsoxei:
2187 case Intrinsic::riscv_vsoxei_mask:
2188 case Intrinsic::riscv_vsuxei:
2189 case Intrinsic::riscv_vsuxei_mask: {
2190 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2191 IntNo == Intrinsic::riscv_vsuxei_mask;
2192 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2193 IntNo == Intrinsic::riscv_vsoxei_mask;
2194
2195 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2196 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2197
2198 unsigned CurOp = 2;
2200 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2201
2202 MVT IndexVT;
2203 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2204 /*IsStridedOrIndexed*/ true, Operands,
2205 /*IsLoad=*/false, &IndexVT);
2206
2208 "Element count mismatch");
2209
2211 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2212 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2213 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2214 report_fatal_error("The V extension does not support EEW=64 for index "
2215 "values when XLEN=32");
2216 }
2217 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2218 IsMasked, IsOrdered, IndexLog2EEW,
2219 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2220 MachineSDNode *Store =
2221 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2222
2223 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2224 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2225
2226 ReplaceNode(Node, Store);
2227 return;
2228 }
2229 case Intrinsic::riscv_vsm:
2230 case Intrinsic::riscv_vse:
2231 case Intrinsic::riscv_vse_mask:
2232 case Intrinsic::riscv_vsse:
2233 case Intrinsic::riscv_vsse_mask: {
2234 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2235 IntNo == Intrinsic::riscv_vsse_mask;
2236 bool IsStrided =
2237 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2238
2239 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2240 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2241
2242 unsigned CurOp = 2;
2244 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2245
2246 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2247 Operands);
2248
2250 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2251 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2252 MachineSDNode *Store =
2253 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2254 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2255 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2256
2257 ReplaceNode(Node, Store);
2258 return;
2259 }
2260 case Intrinsic::riscv_sf_vc_x_se:
2261 case Intrinsic::riscv_sf_vc_i_se:
2262 selectSF_VC_X_SE(Node);
2263 return;
2264 }
2265 break;
2266 }
2267 case ISD::BITCAST: {
2268 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2269 // Just drop bitcasts between vectors if both are fixed or both are
2270 // scalable.
2271 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2272 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2273 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2274 CurDAG->RemoveDeadNode(Node);
2275 return;
2276 }
2277 break;
2278 }
2281 SDValue V = Node->getOperand(0);
2282 SDValue SubV = Node->getOperand(1);
2283 SDLoc DL(SubV);
2284 auto Idx = Node->getConstantOperandVal(2);
2285 MVT SubVecVT = SubV.getSimpleValueType();
2286
2287 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2288 MVT SubVecContainerVT = SubVecVT;
2289 // Establish the correct scalable-vector types for any fixed-length type.
2290 if (SubVecVT.isFixedLengthVector()) {
2291 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2293 [[maybe_unused]] bool ExactlyVecRegSized =
2294 Subtarget->expandVScale(SubVecVT.getSizeInBits())
2295 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2296 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2297 .getKnownMinValue()));
2298 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2299 }
2300 MVT ContainerVT = VT;
2301 if (VT.isFixedLengthVector())
2302 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2303
2304 const auto *TRI = Subtarget->getRegisterInfo();
2305 unsigned SubRegIdx;
2306 std::tie(SubRegIdx, Idx) =
2308 ContainerVT, SubVecContainerVT, Idx, TRI);
2309
2310 // If the Idx hasn't been completely eliminated then this is a subvector
2311 // insert which doesn't naturally align to a vector register. These must
2312 // be handled using instructions to manipulate the vector registers.
2313 if (Idx != 0)
2314 break;
2315
2316 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
2317 [[maybe_unused]] bool IsSubVecPartReg =
2318 SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
2319 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
2320 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
2321 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
2322 V.isUndef()) &&
2323 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2324 "the subvector is smaller than a full-sized register");
2325
2326 // If we haven't set a SubRegIdx, then we must be going between
2327 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2328 if (SubRegIdx == RISCV::NoSubRegister) {
2329 unsigned InRegClassID =
2332 InRegClassID &&
2333 "Unexpected subvector extraction");
2334 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2335 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2336 DL, VT, SubV, RC);
2337 ReplaceNode(Node, NewNode);
2338 return;
2339 }
2340
2341 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2342 ReplaceNode(Node, Insert.getNode());
2343 return;
2344 }
2347 SDValue V = Node->getOperand(0);
2348 auto Idx = Node->getConstantOperandVal(1);
2349 MVT InVT = V.getSimpleValueType();
2350 SDLoc DL(V);
2351
2352 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2353 MVT SubVecContainerVT = VT;
2354 // Establish the correct scalable-vector types for any fixed-length type.
2355 if (VT.isFixedLengthVector()) {
2356 assert(Idx == 0);
2357 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2358 }
2359 if (InVT.isFixedLengthVector())
2360 InVT = TLI.getContainerForFixedLengthVector(InVT);
2361
2362 const auto *TRI = Subtarget->getRegisterInfo();
2363 unsigned SubRegIdx;
2364 std::tie(SubRegIdx, Idx) =
2366 InVT, SubVecContainerVT, Idx, TRI);
2367
2368 // If the Idx hasn't been completely eliminated then this is a subvector
2369 // extract which doesn't naturally align to a vector register. These must
2370 // be handled using instructions to manipulate the vector registers.
2371 if (Idx != 0)
2372 break;
2373
2374 // If we haven't set a SubRegIdx, then we must be going between
2375 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2376 if (SubRegIdx == RISCV::NoSubRegister) {
2377 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2379 InRegClassID &&
2380 "Unexpected subvector extraction");
2381 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2382 SDNode *NewNode =
2383 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2384 ReplaceNode(Node, NewNode);
2385 return;
2386 }
2387
2388 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2389 ReplaceNode(Node, Extract.getNode());
2390 return;
2391 }
2395 case RISCVISD::VFMV_V_F_VL: {
2396 // Try to match splat of a scalar load to a strided load with stride of x0.
2397 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2398 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2399 if (!Node->getOperand(0).isUndef())
2400 break;
2401 SDValue Src = Node->getOperand(1);
2402 auto *Ld = dyn_cast<LoadSDNode>(Src);
2403 // Can't fold load update node because the second
2404 // output is used so that load update node can't be removed.
2405 if (!Ld || Ld->isIndexed())
2406 break;
2407 EVT MemVT = Ld->getMemoryVT();
2408 // The memory VT should be the same size as the element type.
2409 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2410 break;
2411 if (!IsProfitableToFold(Src, Node, Node) ||
2412 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2413 break;
2414
2415 SDValue VL;
2416 if (IsScalarMove) {
2417 // We could deal with more VL if we update the VSETVLI insert pass to
2418 // avoid introducing more VSETVLI.
2419 if (!isOneConstant(Node->getOperand(2)))
2420 break;
2421 selectVLOp(Node->getOperand(2), VL);
2422 } else
2423 selectVLOp(Node->getOperand(2), VL);
2424
2425 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2426 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2427
2428 // If VL=1, then we don't need to do a strided load and can just do a
2429 // regular load.
2430 bool IsStrided = !isOneConstant(VL);
2431
2432 // Only do a strided load if we have optimized zero-stride vector load.
2433 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2434 break;
2435
2437 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2438 Ld->getBasePtr()};
2439 if (IsStrided)
2440 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2442 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2443 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2444
2446 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2447 /*IsMasked*/ false, IsStrided, /*FF*/ false,
2448 Log2SEW, static_cast<unsigned>(LMUL));
2449 MachineSDNode *Load =
2450 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2451 // Update the chain.
2452 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2453 // Record the mem-refs
2454 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2455 // Replace the splat with the vlse.
2456 ReplaceNode(Node, Load);
2457 return;
2458 }
2459 case ISD::PREFETCH:
2460 unsigned Locality = Node->getConstantOperandVal(3);
2461 if (Locality > 2)
2462 break;
2463
2464 if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) {
2465 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2467
2468 int NontemporalLevel = 0;
2469 switch (Locality) {
2470 case 0:
2471 NontemporalLevel = 3; // NTL.ALL
2472 break;
2473 case 1:
2474 NontemporalLevel = 1; // NTL.PALL
2475 break;
2476 case 2:
2477 NontemporalLevel = 0; // NTL.P1
2478 break;
2479 default:
2480 llvm_unreachable("unexpected locality value.");
2481 }
2482
2483 if (NontemporalLevel & 0b1)
2485 if (NontemporalLevel & 0b10)
2487 }
2488 break;
2489 }
2490
2491 // Select the default instruction.
2492 SelectCode(Node);
2493}
2494
2496 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2497 std::vector<SDValue> &OutOps) {
2498 // Always produce a register and immediate operand, as expected by
2499 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2500 switch (ConstraintID) {
2503 SDValue Op0, Op1;
2504 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2505 assert(Found && "SelectAddrRegImm should always succeed");
2506 OutOps.push_back(Op0);
2507 OutOps.push_back(Op1);
2508 return false;
2509 }
2511 OutOps.push_back(Op);
2512 OutOps.push_back(
2513 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2514 return false;
2515 default:
2516 report_fatal_error("Unexpected asm memory constraint " +
2517 InlineAsm::getMemConstraintName(ConstraintID));
2518 }
2519
2520 return true;
2521}
2522
2524 SDValue &Offset) {
2525 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2526 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2527 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2528 return true;
2529 }
2530
2531 return false;
2532}
2533
2534// Select a frame index and an optional immediate offset from an ADD or OR.
2536 SDValue &Offset) {
2538 return true;
2539
2541 return false;
2542
2543 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
2544 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2545 if (isInt<12>(CVal)) {
2546 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
2547 Subtarget->getXLenVT());
2549 Subtarget->getXLenVT());
2550 return true;
2551 }
2552 }
2553
2554 return false;
2555}
2556
2557// Fold constant addresses.
2558static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2559 const MVT VT, const RISCVSubtarget *Subtarget,
2561 bool IsPrefetch = false,
2562 bool IsRV32Zdinx = false) {
2563 if (!isa<ConstantSDNode>(Addr))
2564 return false;
2565
2566 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2567
2568 // If the constant is a simm12, we can fold the whole constant and use X0 as
2569 // the base. If the constant can be materialized with LUI+simm12, use LUI as
2570 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2571 int64_t Lo12 = SignExtend64<12>(CVal);
2572 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2573 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2574 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2575 return false;
2576 if (IsRV32Zdinx && !isInt<12>(Lo12 + 4))
2577 return false;
2578
2579 if (Hi) {
2580 int64_t Hi20 = (Hi >> 12) & 0xfffff;
2581 Base = SDValue(
2582 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2583 CurDAG->getTargetConstant(Hi20, DL, VT)),
2584 0);
2585 } else {
2586 Base = CurDAG->getRegister(RISCV::X0, VT);
2587 }
2588 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
2589 return true;
2590 }
2591
2592 // Ask how constant materialization would handle this constant.
2593 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2594
2595 // If the last instruction would be an ADDI, we can fold its immediate and
2596 // emit the rest of the sequence as the base.
2597 if (Seq.back().getOpcode() != RISCV::ADDI)
2598 return false;
2599 Lo12 = Seq.back().getImm();
2600 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2601 return false;
2602 if (IsRV32Zdinx && !isInt<12>(Lo12 + 4))
2603 return false;
2604
2605 // Drop the last instruction.
2606 Seq.pop_back();
2607 assert(!Seq.empty() && "Expected more instructions in sequence");
2608
2609 Base = selectImmSeq(CurDAG, DL, VT, Seq);
2610 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
2611 return true;
2612}
2613
2614// Is this ADD instruction only used as the base pointer of scalar loads and
2615// stores?
2617 for (auto *User : Add->users()) {
2618 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
2619 User->getOpcode() != ISD::ATOMIC_LOAD &&
2620 User->getOpcode() != ISD::ATOMIC_STORE)
2621 return false;
2622 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
2623 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2624 VT != MVT::f64)
2625 return false;
2626 // Don't allow stores of the value. It must be used as the address.
2627 if (User->getOpcode() == ISD::STORE &&
2628 cast<StoreSDNode>(User)->getValue() == Add)
2629 return false;
2630 if (User->getOpcode() == ISD::ATOMIC_STORE &&
2631 cast<AtomicSDNode>(User)->getVal() == Add)
2632 return false;
2633 }
2634
2635 return true;
2636}
2637
2639 unsigned MaxShiftAmount,
2640 SDValue &Base, SDValue &Index,
2641 SDValue &Scale) {
2642 EVT VT = Addr.getSimpleValueType();
2643 auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2644 SDValue &Shift) {
2645 uint64_t ShiftAmt = 0;
2646 Index = N;
2647
2648 if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
2649 // Only match shifts by a value in range [0, MaxShiftAmount].
2650 if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
2651 Index = N.getOperand(0);
2652 ShiftAmt = N.getConstantOperandVal(1);
2653 }
2654 }
2655
2656 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
2657 return ShiftAmt != 0;
2658 };
2659
2660 if (Addr.getOpcode() == ISD::ADD) {
2661 if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2662 SDValue AddrB = Addr.getOperand(0);
2663 if (AddrB.getOpcode() == ISD::ADD &&
2664 UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
2665 !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
2666 isInt<12>(C1->getSExtValue())) {
2667 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2668 SDValue C1Val =
2669 CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
2670 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
2671 AddrB.getOperand(1), C1Val),
2672 0);
2673 return true;
2674 }
2675 } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
2676 Base = Addr.getOperand(1);
2677 return true;
2678 } else {
2679 UnwrapShl(Addr.getOperand(1), Index, Scale);
2680 Base = Addr.getOperand(0);
2681 return true;
2682 }
2683 } else if (UnwrapShl(Addr, Index, Scale)) {
2684 EVT VT = Addr.getValueType();
2685 Base = CurDAG->getRegister(RISCV::X0, VT);
2686 return true;
2687 }
2688
2689 return false;
2690}
2691
2693 SDValue &Offset, bool IsRV32Zdinx) {
2695 return true;
2696
2697 SDLoc DL(Addr);
2698 MVT VT = Addr.getSimpleValueType();
2699
2700 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2701 // If this is non RV32Zdinx we can always fold.
2702 if (!IsRV32Zdinx) {
2703 Base = Addr.getOperand(0);
2704 Offset = Addr.getOperand(1);
2705 return true;
2706 }
2707
2708 // For RV32Zdinx we need to have more than 4 byte alignment so we can add 4
2709 // to the offset when we expand in RISCVExpandPseudoInsts.
2710 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
2711 const DataLayout &DL = CurDAG->getDataLayout();
2712 Align Alignment = commonAlignment(
2713 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2714 if (Alignment > 4) {
2715 Base = Addr.getOperand(0);
2716 Offset = Addr.getOperand(1);
2717 return true;
2718 }
2719 }
2720 if (auto *CP = dyn_cast<ConstantPoolSDNode>(Addr.getOperand(1))) {
2721 Align Alignment = commonAlignment(CP->getAlign(), CP->getOffset());
2722 if (Alignment > 4) {
2723 Base = Addr.getOperand(0);
2724 Offset = Addr.getOperand(1);
2725 return true;
2726 }
2727 }
2728 }
2729
2730 int64_t RV32ZdinxRange = IsRV32Zdinx ? 4 : 0;
2732 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2733 if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
2734 Base = Addr.getOperand(0);
2735 if (Base.getOpcode() == RISCVISD::ADD_LO) {
2736 SDValue LoOperand = Base.getOperand(1);
2737 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2738 // If the Lo in (ADD_LO hi, lo) is a global variable's address
2739 // (its low part, really), then we can rely on the alignment of that
2740 // variable to provide a margin of safety before low part can overflow
2741 // the 12 bits of the load/store offset. Check if CVal falls within
2742 // that margin; if so (low part + CVal) can't overflow.
2743 const DataLayout &DL = CurDAG->getDataLayout();
2744 Align Alignment = commonAlignment(
2745 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2746 if ((CVal == 0 || Alignment > CVal) &&
2747 (!IsRV32Zdinx || commonAlignment(Alignment, CVal) > 4)) {
2748 int64_t CombinedOffset = CVal + GA->getOffset();
2749 Base = Base.getOperand(0);
2751 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2752 CombinedOffset, GA->getTargetFlags());
2753 return true;
2754 }
2755 }
2756 }
2757
2758 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2759 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2761 return true;
2762 }
2763 }
2764
2765 // Handle ADD with large immediates.
2766 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2767 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2768 assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
2769 "simm12 not already handled?");
2770
2771 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2772 // an ADDI for part of the offset and fold the rest into the load/store.
2773 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2774 if (CVal >= -4096 && CVal <= (4094 - RV32ZdinxRange)) {
2775 int64_t Adj = CVal < 0 ? -2048 : 2047;
2776 Base = SDValue(
2777 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2778 CurDAG->getSignedTargetConstant(Adj, DL, VT)),
2779 0);
2780 Offset = CurDAG->getSignedTargetConstant(CVal - Adj, DL, VT);
2781 return true;
2782 }
2783
2784 // For larger immediates, we might be able to save one instruction from
2785 // constant materialization by folding the Lo12 bits of the immediate into
2786 // the address. We should only do this if the ADD is only used by loads and
2787 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2788 // separately with the full materialized immediate creating extra
2789 // instructions.
2790 if (isWorthFoldingAdd(Addr) &&
2791 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2792 Offset, /*IsPrefetch=*/false, RV32ZdinxRange)) {
2793 // Insert an ADD instruction with the materialized Hi52 bits.
2794 Base = SDValue(
2795 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2796 0);
2797 return true;
2798 }
2799 }
2800
2801 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
2802 /*IsPrefetch=*/false, RV32ZdinxRange))
2803 return true;
2804
2805 Base = Addr;
2806 Offset = CurDAG->getTargetConstant(0, DL, VT);
2807 return true;
2808}
2809
2810/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2811/// Offset should be all zeros.
2813 SDValue &Offset) {
2815 return true;
2816
2817 SDLoc DL(Addr);
2818 MVT VT = Addr.getSimpleValueType();
2819
2821 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2822 if (isInt<12>(CVal)) {
2823 Base = Addr.getOperand(0);
2824
2825 // Early-out if not a valid offset.
2826 if ((CVal & 0b11111) != 0) {
2827 Base = Addr;
2828 Offset = CurDAG->getTargetConstant(0, DL, VT);
2829 return true;
2830 }
2831
2832 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2833 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2835 return true;
2836 }
2837 }
2838
2839 // Handle ADD with large immediates.
2840 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2841 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2842 assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
2843 "simm12 not already handled?");
2844
2845 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
2846 // one instruction by folding adjustment (-2048 or 2016) into the address.
2847 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
2848 int64_t Adj = CVal < 0 ? -2048 : 2016;
2849 int64_t AdjustedOffset = CVal - Adj;
2850 Base =
2852 RISCV::ADDI, DL, VT, Addr.getOperand(0),
2853 CurDAG->getSignedTargetConstant(AdjustedOffset, DL, VT)),
2854 0);
2856 return true;
2857 }
2858
2859 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2860 Offset, /*IsPrefetch=*/true)) {
2861 // Insert an ADD instruction with the materialized Hi52 bits.
2862 Base = SDValue(
2863 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2864 0);
2865 return true;
2866 }
2867 }
2868
2869 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
2870 /*IsPrefetch=*/true))
2871 return true;
2872
2873 Base = Addr;
2874 Offset = CurDAG->getTargetConstant(0, DL, VT);
2875 return true;
2876}
2877
2879 SDValue &Offset) {
2880 if (Addr.getOpcode() != ISD::ADD)
2881 return false;
2882
2883 if (isa<ConstantSDNode>(Addr.getOperand(1)))
2884 return false;
2885
2886 Base = Addr.getOperand(1);
2887 Offset = Addr.getOperand(0);
2888 return true;
2889}
2890
2892 SDValue &ShAmt) {
2893 ShAmt = N;
2894
2895 // Peek through zext.
2896 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
2897 ShAmt = ShAmt.getOperand(0);
2898
2899 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
2900 // amount. If there is an AND on the shift amount, we can bypass it if it
2901 // doesn't affect any of those bits.
2902 if (ShAmt.getOpcode() == ISD::AND &&
2903 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2904 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
2905
2906 // Since the max shift amount is a power of 2 we can subtract 1 to make a
2907 // mask that covers the bits needed to represent all shift amounts.
2908 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
2909 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
2910
2911 if (ShMask.isSubsetOf(AndMask)) {
2912 ShAmt = ShAmt.getOperand(0);
2913 } else {
2914 // SimplifyDemandedBits may have optimized the mask so try restoring any
2915 // bits that are known zero.
2916 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
2917 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
2918 return true;
2919 ShAmt = ShAmt.getOperand(0);
2920 }
2921 }
2922
2923 if (ShAmt.getOpcode() == ISD::ADD &&
2924 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2925 uint64_t Imm = ShAmt.getConstantOperandVal(1);
2926 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
2927 // to avoid the ADD.
2928 if (Imm != 0 && Imm % ShiftWidth == 0) {
2929 ShAmt = ShAmt.getOperand(0);
2930 return true;
2931 }
2932 } else if (ShAmt.getOpcode() == ISD::SUB &&
2933 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
2934 uint64_t Imm = ShAmt.getConstantOperandVal(0);
2935 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2936 // generate a NEG instead of a SUB of a constant.
2937 if (Imm != 0 && Imm % ShiftWidth == 0) {
2938 SDLoc DL(ShAmt);
2939 EVT VT = ShAmt.getValueType();
2940 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
2941 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
2942 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
2943 ShAmt.getOperand(1));
2944 ShAmt = SDValue(Neg, 0);
2945 return true;
2946 }
2947 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2948 // to generate a NOT instead of a SUB of a constant.
2949 if (Imm % ShiftWidth == ShiftWidth - 1) {
2950 SDLoc DL(ShAmt);
2951 EVT VT = ShAmt.getValueType();
2953 RISCV::XORI, DL, VT, ShAmt.getOperand(1),
2954 CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true));
2955 ShAmt = SDValue(Not, 0);
2956 return true;
2957 }
2958 }
2959
2960 return true;
2961}
2962
2963/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2964/// check for equality with 0. This function emits instructions that convert the
2965/// seteq/setne into something that can be compared with 0.
2966/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
2967/// ISD::SETNE).
2969 SDValue &Val) {
2970 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
2971 "Unexpected condition code!");
2972
2973 // We're looking for a setcc.
2974 if (N->getOpcode() != ISD::SETCC)
2975 return false;
2976
2977 // Must be an equality comparison.
2978 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
2979 if (CCVal != ExpectedCCVal)
2980 return false;
2981
2982 SDValue LHS = N->getOperand(0);
2983 SDValue RHS = N->getOperand(1);
2984
2985 if (!LHS.getValueType().isScalarInteger())
2986 return false;
2987
2988 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
2989 if (isNullConstant(RHS)) {
2990 Val = LHS;
2991 return true;
2992 }
2993
2994 SDLoc DL(N);
2995
2996 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
2997 int64_t CVal = C->getSExtValue();
2998 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2999 // non-zero otherwise.
3000 if (CVal == -2048) {
3001 Val = SDValue(
3003 RISCV::XORI, DL, N->getValueType(0), LHS,
3004 CurDAG->getSignedTargetConstant(CVal, DL, N->getValueType(0))),
3005 0);
3006 return true;
3007 }
3008 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
3009 // LHS is equal to the RHS and non-zero otherwise.
3010 if (isInt<12>(CVal) || CVal == 2048) {
3011 Val = SDValue(
3013 RISCV::ADDI, DL, N->getValueType(0), LHS,
3014 CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
3015 0);
3016 return true;
3017 }
3018 if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) {
3019 Val = SDValue(
3021 RISCV::BINVI, DL, N->getValueType(0), LHS,
3022 CurDAG->getTargetConstant(Log2_64(CVal), DL, N->getValueType(0))),
3023 0);
3024 return true;
3025 }
3026 }
3027
3028 // If nothing else we can XOR the LHS and RHS to produce zero if they are
3029 // equal and a non-zero value if they aren't.
3030 Val = SDValue(
3031 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
3032 return true;
3033}
3034
3036 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3037 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
3038 Val = N.getOperand(0);
3039 return true;
3040 }
3041
3042 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
3043 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
3044 return N;
3045
3046 SDValue N0 = N.getOperand(0);
3047 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3048 N.getConstantOperandVal(1) == ShiftAmt &&
3049 N0.getConstantOperandVal(1) == ShiftAmt)
3050 return N0.getOperand(0);
3051
3052 return N;
3053 };
3054
3055 MVT VT = N.getSimpleValueType();
3056 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
3057 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
3058 return true;
3059 }
3060
3061 return false;
3062}
3063
3065 if (N.getOpcode() == ISD::AND) {
3066 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3067 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3068 Val = N.getOperand(0);
3069 return true;
3070 }
3071 }
3072 MVT VT = N.getSimpleValueType();
3073 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
3074 if (CurDAG->MaskedValueIsZero(N, Mask)) {
3075 Val = N;
3076 return true;
3077 }
3078
3079 return false;
3080}
3081
3082/// Look for various patterns that can be done with a SHL that can be folded
3083/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
3084/// SHXADD we are trying to match.
3086 SDValue &Val) {
3087 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
3088 SDValue N0 = N.getOperand(0);
3089
3090 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
3091 (LeftShift || N0.getOpcode() == ISD::SRL) &&
3092 isa<ConstantSDNode>(N0.getOperand(1))) {
3093 uint64_t Mask = N.getConstantOperandVal(1);
3094 unsigned C2 = N0.getConstantOperandVal(1);
3095
3096 unsigned XLen = Subtarget->getXLen();
3097 if (LeftShift)
3098 Mask &= maskTrailingZeros<uint64_t>(C2);
3099 else
3100 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
3101
3102 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
3103 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
3104 // followed by a SHXADD with c3 for the X amount.
3105 if (isShiftedMask_64(Mask)) {
3106 unsigned Leading = XLen - llvm::bit_width(Mask);
3107 unsigned Trailing = llvm::countr_zero(Mask);
3108 if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
3109 SDLoc DL(N);
3110 EVT VT = N.getValueType();
3112 RISCV::SRLI, DL, VT, N0.getOperand(0),
3113 CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
3114 0);
3115 return true;
3116 }
3117 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3118 // leading zeros and c3 trailing zeros. We can use an SRLI by C3
3119 // followed by a SHXADD using c3 for the X amount.
3120 if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
3121 SDLoc DL(N);
3122 EVT VT = N.getValueType();
3123 Val = SDValue(
3125 RISCV::SRLI, DL, VT, N0.getOperand(0),
3126 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
3127 0);
3128 return true;
3129 }
3130 }
3131 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
3132 isa<ConstantSDNode>(N0.getOperand(1))) {
3133 uint64_t Mask = N.getConstantOperandVal(1);
3134 unsigned C2 = N0.getConstantOperandVal(1);
3135
3136 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
3137 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
3138 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
3139 // the X amount.
3140 if (isShiftedMask_64(Mask)) {
3141 unsigned XLen = Subtarget->getXLen();
3142 unsigned Leading = XLen - llvm::bit_width(Mask);
3143 unsigned Trailing = llvm::countr_zero(Mask);
3144 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
3145 SDLoc DL(N);
3146 EVT VT = N.getValueType();
3148 RISCV::SRAI, DL, VT, N0.getOperand(0),
3149 CurDAG->getTargetConstant(C2 - Leading, DL, VT)),
3150 0);
3152 RISCV::SRLI, DL, VT, Val,
3153 CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)),
3154 0);
3155 return true;
3156 }
3157 }
3158 }
3159 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
3160 (LeftShift || N.getOpcode() == ISD::SRL) &&
3161 isa<ConstantSDNode>(N.getOperand(1))) {
3162 SDValue N0 = N.getOperand(0);
3163 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
3164 isa<ConstantSDNode>(N0.getOperand(1))) {
3165 uint64_t Mask = N0.getConstantOperandVal(1);
3166 if (isShiftedMask_64(Mask)) {
3167 unsigned C1 = N.getConstantOperandVal(1);
3168 unsigned XLen = Subtarget->getXLen();
3169 unsigned Leading = XLen - llvm::bit_width(Mask);
3170 unsigned Trailing = llvm::countr_zero(Mask);
3171 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
3172 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
3173 if (LeftShift && Leading == 32 && Trailing > 0 &&
3174 (Trailing + C1) == ShAmt) {
3175 SDLoc DL(N);
3176 EVT VT = N.getValueType();
3178 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3179 CurDAG->getTargetConstant(Trailing, DL, VT)),
3180 0);
3181 return true;
3182 }
3183 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3184 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3185 if (!LeftShift && Leading == 32 && Trailing > C1 &&
3186 (Trailing - C1) == ShAmt) {
3187 SDLoc DL(N);
3188 EVT VT = N.getValueType();
3190 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3191 CurDAG->getTargetConstant(Trailing, DL, VT)),
3192 0);
3193 return true;
3194 }
3195 }
3196 }
3197 }
3198
3199 return false;
3200}
3201
3202/// Look for various patterns that can be done with a SHL that can be folded
3203/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3204/// SHXADD_UW we are trying to match.
3206 SDValue &Val) {
3207 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
3208 N.hasOneUse()) {
3209 SDValue N0 = N.getOperand(0);
3210 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3211 N0.hasOneUse()) {
3212 uint64_t Mask = N.getConstantOperandVal(1);
3213 unsigned C2 = N0.getConstantOperandVal(1);
3214
3215 Mask &= maskTrailingZeros<uint64_t>(C2);
3216
3217 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3218 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3219 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3220 if (isShiftedMask_64(Mask)) {
3221 unsigned Leading = llvm::countl_zero(Mask);
3222 unsigned Trailing = llvm::countr_zero(Mask);
3223 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3224 SDLoc DL(N);
3225 EVT VT = N.getValueType();
3227 RISCV::SLLI, DL, VT, N0.getOperand(0),
3228 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
3229 0);
3230 return true;
3231 }
3232 }
3233 }
3234 }
3235
3236 return false;
3237}
3238
3240 if (!isa<ConstantSDNode>(N))
3241 return false;
3242
3243 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3244 if ((Imm & 0xfff) != 0xfff || Imm == -1)
3245 return false;
3246
3247 for (const SDNode *U : N->users()) {
3248 if (!ISD::isBitwiseLogicOp(U->getOpcode()))
3249 return false;
3250 }
3251
3252 // For 32-bit signed constants we already know it's a win: LUI+ADDI vs LUI.
3253 // For 64-bit constants, the instruction sequences get complex,
3254 // so we select inverted only if it's cheaper.
3255 if (!isInt<32>(Imm)) {
3256 int OrigImmCost = RISCVMatInt::getIntMatCost(APInt(64, Imm), 64, *Subtarget,
3257 /*CompressionCost=*/true);
3258 int NegImmCost = RISCVMatInt::getIntMatCost(APInt(64, ~Imm), 64, *Subtarget,
3259 /*CompressionCost=*/true);
3260 if (OrigImmCost <= NegImmCost)
3261 return false;
3262 }
3263
3264 Val = selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), ~Imm, *Subtarget);
3265 return true;
3266}
3267
3268static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
3269 unsigned Bits,
3270 const TargetInstrInfo *TII) {
3271 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
3272
3273 if (!MCOpcode)
3274 return false;
3275
3276 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
3277 const uint64_t TSFlags = MCID.TSFlags;
3278 if (!RISCVII::hasSEWOp(TSFlags))
3279 return false;
3280 assert(RISCVII::hasVLOp(TSFlags));
3281
3282 bool HasGlueOp = User->getGluedNode() != nullptr;
3283 unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
3284 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
3285 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
3286 unsigned VLIdx =
3287 User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3288 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
3289
3290 if (UserOpNo == VLIdx)
3291 return false;
3292
3293 auto NumDemandedBits =
3294 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
3295 return NumDemandedBits && Bits >= *NumDemandedBits;
3296}
3297
3298// Return true if all users of this SDNode* only consume the lower \p Bits.
3299// This can be used to form W instructions for add/sub/mul/shl even when the
3300// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3301// SimplifyDemandedBits has made it so some users see a sext_inreg and some
3302// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3303// the add/sub/mul/shl to become non-W instructions. By checking the users we
3304// may be able to use a W instruction and CSE with the other instruction if
3305// this has happened. We could try to detect that the CSE opportunity exists
3306// before doing this, but that would be more complicated.
3308 const unsigned Depth) const {
3309 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3310 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3311 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3312 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3313 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3314 isa<ConstantSDNode>(Node) || Depth != 0) &&
3315 "Unexpected opcode");
3316
3318 return false;
3319
3320 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3321 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3322 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
3323 return false;
3324
3325 for (SDUse &Use : Node->uses()) {
3326 SDNode *User = Use.getUser();
3327 // Users of this node should have already been instruction selected
3328 if (!User->isMachineOpcode())
3329 return false;
3330
3331 // TODO: Add more opcodes?
3332 switch (User->getMachineOpcode()) {
3333 default:
3335 break;
3336 return false;
3337 case RISCV::ADDW:
3338 case RISCV::ADDIW:
3339 case RISCV::SUBW:
3340 case RISCV::MULW:
3341 case RISCV::SLLW:
3342 case RISCV::SLLIW:
3343 case RISCV::SRAW:
3344 case RISCV::SRAIW:
3345 case RISCV::SRLW:
3346 case RISCV::SRLIW:
3347 case RISCV::DIVW:
3348 case RISCV::DIVUW:
3349 case RISCV::REMW:
3350 case RISCV::REMUW:
3351 case RISCV::ROLW:
3352 case RISCV::RORW:
3353 case RISCV::RORIW:
3354 case RISCV::CLZW:
3355 case RISCV::CTZW:
3356 case RISCV::CPOPW:
3357 case RISCV::SLLI_UW:
3358 case RISCV::FMV_W_X:
3359 case RISCV::FCVT_H_W:
3360 case RISCV::FCVT_H_W_INX:
3361 case RISCV::FCVT_H_WU:
3362 case RISCV::FCVT_H_WU_INX:
3363 case RISCV::FCVT_S_W:
3364 case RISCV::FCVT_S_W_INX:
3365 case RISCV::FCVT_S_WU:
3366 case RISCV::FCVT_S_WU_INX:
3367 case RISCV::FCVT_D_W:
3368 case RISCV::FCVT_D_W_INX:
3369 case RISCV::FCVT_D_WU:
3370 case RISCV::FCVT_D_WU_INX:
3371 case RISCV::TH_REVW:
3372 case RISCV::TH_SRRIW:
3373 if (Bits >= 32)
3374 break;
3375 return false;
3376 case RISCV::SLL:
3377 case RISCV::SRA:
3378 case RISCV::SRL:
3379 case RISCV::ROL:
3380 case RISCV::ROR:
3381 case RISCV::BSET:
3382 case RISCV::BCLR:
3383 case RISCV::BINV:
3384 // Shift amount operands only use log2(Xlen) bits.
3385 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
3386 break;
3387 return false;
3388 case RISCV::SLLI:
3389 // SLLI only uses the lower (XLen - ShAmt) bits.
3390 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
3391 break;
3392 return false;
3393 case RISCV::ANDI:
3394 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
3395 break;
3396 goto RecCheck;
3397 case RISCV::ORI: {
3398 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3399 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
3400 break;
3401 [[fallthrough]];
3402 }
3403 case RISCV::AND:
3404 case RISCV::OR:
3405 case RISCV::XOR:
3406 case RISCV::XORI:
3407 case RISCV::ANDN:
3408 case RISCV::ORN:
3409 case RISCV::XNOR:
3410 case RISCV::SH1ADD:
3411 case RISCV::SH2ADD:
3412 case RISCV::SH3ADD:
3413 RecCheck:
3414 if (hasAllNBitUsers(User, Bits, Depth + 1))
3415 break;
3416 return false;
3417 case RISCV::SRLI: {
3418 unsigned ShAmt = User->getConstantOperandVal(1);
3419 // If we are shifting right by less than Bits, and users don't demand any
3420 // bits that were shifted into [Bits-1:0], then we can consider this as an
3421 // N-Bit user.
3422 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
3423 break;
3424 return false;
3425 }
3426 case RISCV::SEXT_B:
3427 case RISCV::PACKH:
3428 if (Bits >= 8)
3429 break;
3430 return false;
3431 case RISCV::SEXT_H:
3432 case RISCV::FMV_H_X:
3433 case RISCV::ZEXT_H_RV32:
3434 case RISCV::ZEXT_H_RV64:
3435 case RISCV::PACKW:
3436 if (Bits >= 16)
3437 break;
3438 return false;
3439 case RISCV::PACK:
3440 if (Bits >= (Subtarget->getXLen() / 2))
3441 break;
3442 return false;
3443 case RISCV::ADD_UW:
3444 case RISCV::SH1ADD_UW:
3445 case RISCV::SH2ADD_UW:
3446 case RISCV::SH3ADD_UW:
3447 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3448 // 32 bits.
3449 if (Use.getOperandNo() == 0 && Bits >= 32)
3450 break;
3451 return false;
3452 case RISCV::SB:
3453 if (Use.getOperandNo() == 0 && Bits >= 8)
3454 break;
3455 return false;
3456 case RISCV::SH:
3457 if (Use.getOperandNo() == 0 && Bits >= 16)
3458 break;
3459 return false;
3460 case RISCV::SW:
3461 if (Use.getOperandNo() == 0 && Bits >= 32)
3462 break;
3463 return false;
3464 }
3465 }
3466
3467 return true;
3468}
3469
3470// Select a constant that can be represented as (sign_extend(imm5) << imm2).
3472 SDValue &Shl2) {
3473 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3474 int64_t Offset = C->getSExtValue();
3475 unsigned Shift;
3476 for (Shift = 0; Shift < 4; Shift++)
3477 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
3478 break;
3479
3480 // Constant cannot be encoded.
3481 if (Shift == 4)
3482 return false;
3483
3484 EVT Ty = N->getValueType(0);
3485 Simm5 = CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(N), Ty);
3486 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
3487 return true;
3488 }
3489
3490 return false;
3491}
3492
3493// Select VL as a 5 bit immediate or a value that will become a register. This
3494// allows us to choose betwen VSETIVLI or VSETVLI later.
3496 auto *C = dyn_cast<ConstantSDNode>(N);
3497 if (C && isUInt<5>(C->getZExtValue())) {
3498 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
3499 N->getValueType(0));
3500 } else if (C && C->isAllOnes()) {
3501 // Treat all ones as VLMax.
3503 N->getValueType(0));
3504 } else if (isa<RegisterSDNode>(N) &&
3505 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
3506 // All our VL operands use an operand that allows GPRNoX0 or an immediate
3507 // as the register class. Convert X0 to a special immediate to pass the
3508 // MachineVerifier. This is recognized specially by the vsetvli insertion
3509 // pass.
3511 N->getValueType(0));
3512 } else {
3513 VL = N;
3514 }
3515
3516 return true;
3517}
3518
3520 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
3521 if (!N.getOperand(0).isUndef())
3522 return SDValue();
3523 N = N.getOperand(1);
3524 }
3525 SDValue Splat = N;
3526 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
3527 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
3528 !Splat.getOperand(0).isUndef())
3529 return SDValue();
3530 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
3531 return Splat;
3532}
3533
3536 if (!Splat)
3537 return false;
3538
3539 SplatVal = Splat.getOperand(1);
3540 return true;
3541}
3542
3544 SelectionDAG &DAG,
3545 const RISCVSubtarget &Subtarget,
3546 std::function<bool(int64_t)> ValidateImm) {
3548 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
3549 return false;
3550
3551 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
3552 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
3553 "Unexpected splat operand type");
3554
3555 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3556 // type is wider than the resulting vector element type: an implicit
3557 // truncation first takes place. Therefore, perform a manual
3558 // truncation/sign-extension in order to ignore any truncated bits and catch
3559 // any zero-extended immediate.
3560 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3561 // sign-extending to (XLenVT -1).
3562 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
3563
3564 int64_t SplatImm = SplatConst.getSExtValue();
3565
3566 if (!ValidateImm(SplatImm))
3567 return false;
3568
3569 SplatVal =
3570 DAG.getSignedTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
3571 return true;
3572}
3573
3575 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
3576 [](int64_t Imm) { return isInt<5>(Imm); });
3577}
3578
3580 return selectVSplatImmHelper(
3581 N, SplatVal, *CurDAG, *Subtarget,
3582 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
3583}
3584
3586 SDValue &SplatVal) {
3587 return selectVSplatImmHelper(
3588 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
3589 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
3590 });
3591}
3592
3594 SDValue &SplatVal) {
3595 return selectVSplatImmHelper(
3596 N, SplatVal, *CurDAG, *Subtarget,
3597 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
3598}
3599
3601 auto IsExtOrTrunc = [](SDValue N) {
3602 switch (N->getOpcode()) {
3603 case ISD::SIGN_EXTEND:
3604 case ISD::ZERO_EXTEND:
3605 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
3606 // inactive elements will be undef.
3608 case RISCVISD::VSEXT_VL:
3609 case RISCVISD::VZEXT_VL:
3610 return true;
3611 default:
3612 return false;
3613 }
3614 };
3615
3616 // We can have multiple nested nodes, so unravel them all if needed.
3617 while (IsExtOrTrunc(N)) {
3618 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
3619 return false;
3620 N = N->getOperand(0);
3621 }
3622
3623 return selectVSplat(N, SplatVal);
3624}
3625
3627 // Allow bitcasts from XLenVT -> FP.
3628 if (N.getOpcode() == ISD::BITCAST &&
3629 N.getOperand(0).getValueType() == Subtarget->getXLenVT()) {
3630 Imm = N.getOperand(0);
3631 return true;
3632 }
3633 // Allow moves from XLenVT to FP.
3634 if (N.getOpcode() == RISCVISD::FMV_H_X ||
3635 N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
3636 Imm = N.getOperand(0);
3637 return true;
3638 }
3639
3640 // Otherwise, look for FP constants that can materialized with scalar int.
3641 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3642 if (!CFP)
3643 return false;
3644 const APFloat &APF = CFP->getValueAPF();
3645 // td can handle +0.0 already.
3646 if (APF.isPosZero())
3647 return false;
3648
3649 MVT VT = CFP->getSimpleValueType(0);
3650
3651 MVT XLenVT = Subtarget->getXLenVT();
3652 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3653 assert(APF.isNegZero() && "Unexpected constant.");
3654 return false;
3655 }
3656 SDLoc DL(N);
3657 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3658 *Subtarget);
3659 return true;
3660}
3661
3663 SDValue &Imm) {
3664 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3665 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3666
3667 if (!isInt<5>(ImmVal))
3668 return false;
3669
3670 Imm = CurDAG->getSignedTargetConstant(ImmVal, SDLoc(N),
3671 Subtarget->getXLenVT());
3672 return true;
3673 }
3674
3675 return false;
3676}
3677
3678// Try to remove sext.w if the input is a W instruction or can be made into
3679// a W instruction cheaply.
3680bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3681 // Look for the sext.w pattern, addiw rd, rs1, 0.
3682 if (N->getMachineOpcode() != RISCV::ADDIW ||
3683 !isNullConstant(N->getOperand(1)))
3684 return false;
3685
3686 SDValue N0 = N->getOperand(0);
3687 if (!N0.isMachineOpcode())
3688 return false;
3689
3690 switch (N0.getMachineOpcode()) {
3691 default:
3692 break;
3693 case RISCV::ADD:
3694 case RISCV::ADDI:
3695 case RISCV::SUB:
3696 case RISCV::MUL:
3697 case RISCV::SLLI: {
3698 // Convert sext.w+add/sub/mul to their W instructions. This will create
3699 // a new independent instruction. This improves latency.
3700 unsigned Opc;
3701 switch (N0.getMachineOpcode()) {
3702 default:
3703 llvm_unreachable("Unexpected opcode!");
3704 case RISCV::ADD: Opc = RISCV::ADDW; break;
3705 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3706 case RISCV::SUB: Opc = RISCV::SUBW; break;
3707 case RISCV::MUL: Opc = RISCV::MULW; break;
3708 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3709 }
3710
3711 SDValue N00 = N0.getOperand(0);
3712 SDValue N01 = N0.getOperand(1);
3713
3714 // Shift amount needs to be uimm5.
3715 if (N0.getMachineOpcode() == RISCV::SLLI &&
3716 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
3717 break;
3718
3719 SDNode *Result =
3720 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
3721 N00, N01);
3722 ReplaceUses(N, Result);
3723 return true;
3724 }
3725 case RISCV::ADDW:
3726 case RISCV::ADDIW:
3727 case RISCV::SUBW:
3728 case RISCV::MULW:
3729 case RISCV::SLLIW:
3730 case RISCV::PACKW:
3731 case RISCV::TH_MULAW:
3732 case RISCV::TH_MULAH:
3733 case RISCV::TH_MULSW:
3734 case RISCV::TH_MULSH:
3735 if (N0.getValueType() == MVT::i32)
3736 break;
3737
3738 // Result is already sign extended just remove the sext.w.
3739 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
3740 ReplaceUses(N, N0.getNode());
3741 return true;
3742 }
3743
3744 return false;
3745}
3746
3747// After ISel, a vector pseudo's mask will be copied to V0 via a CopyToReg
3748// that's glued to the pseudo. This tries to look up the value that was copied
3749// to V0.
3750static SDValue getMaskSetter(SDValue MaskOp, SDValue GlueOp) {
3751 // Check that we're using V0 as a mask register.
3752 if (!isa<RegisterSDNode>(MaskOp) ||
3753 cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)
3754 return SDValue();
3755
3756 // The glued user defines V0.
3757 const auto *Glued = GlueOp.getNode();
3758
3759 if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
3760 return SDValue();
3761
3762 // Check that we're defining V0 as a mask register.
3763 if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
3764 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
3765 return SDValue();
3766
3767 SDValue MaskSetter = Glued->getOperand(2);
3768
3769 // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
3770 // from an extract_subvector or insert_subvector.
3771 if (MaskSetter->isMachineOpcode() &&
3772 MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)
3773 MaskSetter = MaskSetter->getOperand(0);
3774
3775 return MaskSetter;
3776}
3777
3778static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
3779 // Check the instruction defining V0; it needs to be a VMSET pseudo.
3780 SDValue MaskSetter = getMaskSetter(MaskOp, GlueOp);
3781 if (!MaskSetter)
3782 return false;
3783
3784 const auto IsVMSet = [](unsigned Opc) {
3785 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
3786 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
3787 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
3788 Opc == RISCV::PseudoVMSET_M_B8;
3789 };
3790
3791 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3792 // undefined behaviour if it's the wrong bitwidth, so we could choose to
3793 // assume that it's all-ones? Same applies to its VL.
3794 return MaskSetter->isMachineOpcode() &&
3795 IsVMSet(MaskSetter.getMachineOpcode());
3796}
3797
3798// Return true if we can make sure mask of N is all-ones mask.
3799static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
3800 return usesAllOnesMask(N->getOperand(MaskOpIdx),
3801 N->getOperand(N->getNumOperands() - 1));
3802}
3803
3804static bool isImplicitDef(SDValue V) {
3805 if (!V.isMachineOpcode())
3806 return false;
3807 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
3808 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
3809 if (!isImplicitDef(V.getOperand(I)))
3810 return false;
3811 return true;
3812 }
3813 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
3814}
3815
3816static bool hasGPROut(unsigned Opc) {
3817 switch (RISCV::getRVVMCOpcode(Opc)) {
3818 case RISCV::VCPOP_M:
3819 case RISCV::VFIRST_M:
3820 return true;
3821 }
3822 return false;
3823}
3824
3825// Optimize masked RVV pseudo instructions with a known all-ones mask to their
3826// corresponding "unmasked" pseudo versions. The mask we're interested in will
3827// take the form of a V0 physical register operand, with a glued
3828// register-setting instruction.
3829bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
3831 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
3832 if (!I)
3833 return false;
3834
3835 unsigned MaskOpIdx = I->MaskOpIdx;
3836 if (!usesAllOnesMask(N, MaskOpIdx))
3837 return false;
3838
3839 // There are two classes of pseudos in the table - compares and
3840 // everything else. See the comment on RISCVMaskedPseudo for details.
3841 const unsigned Opc = I->UnmaskedPseudo;
3842 const MCInstrDesc &MCID = TII->get(Opc);
3843 const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags);
3844#ifndef NDEBUG
3845 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
3848 "Masked and unmasked pseudos are inconsistent");
3849 const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID);
3850 assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure");
3851#endif
3852
3854 // Skip the passthru operand at index 0 if !UseTUPseudo and no GPR out.
3855 bool ShouldSkip = !UseTUPseudo && !hasGPROut(Opc);
3856 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
3857 // Skip the mask, and the Glue.
3858 SDValue Op = N->getOperand(I);
3859 if (I == MaskOpIdx || Op.getValueType() == MVT::Glue)
3860 continue;
3861 Ops.push_back(Op);
3862 }
3863
3864 // Transitively apply any node glued to our new node.
3865 const auto *Glued = N->getGluedNode();
3866 if (auto *TGlued = Glued->getGluedNode())
3867 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
3868
3870 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3871
3872 if (!N->memoperands_empty())
3873 CurDAG->setNodeMemRefs(Result, N->memoperands());
3874
3875 Result->setFlags(N->getFlags());
3876 ReplaceUses(N, Result);
3877
3878 return true;
3879}
3880
3881static bool IsVMerge(SDNode *N) {
3882 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
3883}
3884
3885// Try to fold away VMERGE_VVM instructions into their true operands:
3886//
3887// %true = PseudoVADD_VV ...
3888// %x = PseudoVMERGE_VVM %false, %false, %true, %mask
3889// ->
3890// %x = PseudoVADD_VV_MASK %false, ..., %mask
3891//
3892// We can only fold if vmerge's passthru operand, vmerge's false operand and
3893// %true's passthru operand (if it has one) are the same. This is because we
3894// have to consolidate them into one passthru operand in the result.
3895//
3896// If %true is masked, then we can use its mask instead of vmerge's if vmerge's
3897// mask is all ones.
3898//
3899// The resulting VL is the minimum of the two VLs.
3900//
3901// The resulting policy is the effective policy the vmerge would have had,
3902// i.e. whether or not it's passthru operand was implicit-def.
3903bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
3904 SDValue Passthru, False, True, VL, Mask, Glue;
3905 assert(IsVMerge(N));
3906 Passthru = N->getOperand(0);
3907 False = N->getOperand(1);
3908 True = N->getOperand(2);
3909 Mask = N->getOperand(3);
3910 VL = N->getOperand(4);
3911 // We always have a glue node for the mask at v0.
3912 Glue = N->getOperand(N->getNumOperands() - 1);
3913 assert(cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
3914 assert(Glue.getValueType() == MVT::Glue);
3915
3916 // If the EEW of True is different from vmerge's SEW, then we can't fold.
3917 if (True.getSimpleValueType() != N->getSimpleValueType(0))
3918 return false;
3919
3920 // We require that either passthru and false are the same, or that passthru
3921 // is undefined.
3922 if (Passthru != False && !isImplicitDef(Passthru))
3923 return false;
3924
3925 assert(True.getResNo() == 0 &&
3926 "Expect True is the first output of an instruction.");
3927
3928 // Need N is the exactly one using True.
3929 if (!True.hasOneUse())
3930 return false;
3931
3932 if (!True.isMachineOpcode())
3933 return false;
3934
3935 unsigned TrueOpc = True.getMachineOpcode();
3936 const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
3937 uint64_t TrueTSFlags = TrueMCID.TSFlags;
3938 bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
3939
3941 RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
3942 if (!Info)
3943 return false;
3944
3945 // If True has a passthru operand then it needs to be the same as vmerge's
3946 // False, since False will be used for the result's passthru operand.
3947 if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
3948 SDValue PassthruOpTrue = True->getOperand(0);
3949 if (False != PassthruOpTrue)
3950 return false;
3951 }
3952
3953 // Skip if True has side effect.
3954 if (TII->get(TrueOpc).hasUnmodeledSideEffects())
3955 return false;
3956
3957 // The last operand of a masked instruction may be glued.
3958 bool HasGlueOp = True->getGluedNode() != nullptr;
3959
3960 // The chain operand may exist either before the glued operands or in the last
3961 // position.
3962 unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
3963 bool HasChainOp =
3964 True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
3965
3966 if (HasChainOp) {
3967 // Avoid creating cycles in the DAG. We must ensure that none of the other
3968 // operands depend on True through it's Chain.
3969 SmallVector<const SDNode *, 4> LoopWorklist;
3971 LoopWorklist.push_back(False.getNode());
3972 LoopWorklist.push_back(Mask.getNode());
3973 LoopWorklist.push_back(VL.getNode());
3974 LoopWorklist.push_back(Glue.getNode());
3975 if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
3976 return false;
3977 }
3978
3979 // The vector policy operand may be present for masked intrinsics
3980 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
3981 unsigned TrueVLIndex =
3982 True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3983 SDValue TrueVL = True.getOperand(TrueVLIndex);
3984 SDValue SEW = True.getOperand(TrueVLIndex + 1);
3985
3986 auto GetMinVL = [](SDValue LHS, SDValue RHS) {
3987 if (LHS == RHS)
3988 return LHS;
3989 if (isAllOnesConstant(LHS))
3990 return RHS;
3991 if (isAllOnesConstant(RHS))
3992 return LHS;
3993 auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
3994 auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
3995 if (!CLHS || !CRHS)
3996 return SDValue();
3997 return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
3998 };
3999
4000 // Because N and True must have the same passthru operand (or True's operand
4001 // is implicit_def), the "effective" body is the minimum of their VLs.
4002 SDValue OrigVL = VL;
4003 VL = GetMinVL(TrueVL, VL);
4004 if (!VL)
4005 return false;
4006
4007 // Some operations produce different elementwise results depending on the
4008 // active elements, like viota.m or vredsum. This transformation is illegal
4009 // for these if we change the active elements (i.e. mask or VL).
4010 const MCInstrDesc &TrueBaseMCID = TII->get(RISCV::getRVVMCOpcode(TrueOpc));
4011 if (RISCVII::elementsDependOnVL(TrueBaseMCID.TSFlags) && (TrueVL != VL))
4012 return false;
4013 if (RISCVII::elementsDependOnMask(TrueBaseMCID.TSFlags) &&
4014 (Mask && !usesAllOnesMask(Mask, Glue)))
4015 return false;
4016
4017 // Make sure it doesn't raise any observable fp exceptions, since changing the
4018 // active elements will affect how fflags is set.
4019 if (mayRaiseFPException(True.getNode()) && !True->getFlags().hasNoFPExcept())
4020 return false;
4021
4022 SDLoc DL(N);
4023
4024 unsigned MaskedOpc = Info->MaskedPseudo;
4025#ifndef NDEBUG
4026 const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
4028 "Expected instructions with mask have policy operand.");
4029 assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
4030 MCOI::TIED_TO) == 0 &&
4031 "Expected instructions with mask have a tied dest.");
4032#endif
4033
4034 // Use a tumu policy, relaxing it to tail agnostic provided that the passthru
4035 // operand is undefined.
4036 //
4037 // However, if the VL became smaller than what the vmerge had originally, then
4038 // elements past VL that were previously in the vmerge's body will have moved
4039 // to the tail. In that case we always need to use tail undisturbed to
4040 // preserve them.
4041 bool MergeVLShrunk = VL != OrigVL;
4042 uint64_t Policy = (isImplicitDef(Passthru) && !MergeVLShrunk)
4044 : /*TUMU*/ 0;
4045 SDValue PolicyOp =
4046 CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
4047
4048
4050 Ops.push_back(False);
4051
4052 const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
4053 const unsigned NormalOpsEnd = TrueVLIndex - HasRoundingMode;
4054 Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
4055
4056 Ops.push_back(Mask);
4057
4058 // For unmasked "VOp" with rounding mode operand, that is interfaces like
4059 // (..., rm, vl) or (..., rm, vl, policy).
4060 // Its masked version is (..., vm, rm, vl, policy).
4061 // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
4062 if (HasRoundingMode)
4063 Ops.push_back(True->getOperand(TrueVLIndex - 1));
4064
4065 Ops.append({VL, SEW, PolicyOp});
4066
4067 // Result node should have chain operand of True.
4068 if (HasChainOp)
4069 Ops.push_back(True.getOperand(TrueChainOpIdx));
4070
4071 // Add the glue for the CopyToReg of mask->v0.
4072 Ops.push_back(Glue);
4073
4075 CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
4076 Result->setFlags(True->getFlags());
4077
4078 if (!cast<MachineSDNode>(True)->memoperands_empty())
4079 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
4080
4081 // Replace vmerge.vvm node by Result.
4082 ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
4083
4084 // Replace another value of True. E.g. chain and VL.
4085 for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
4086 ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
4087
4088 return true;
4089}
4090
4091bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
4092 bool MadeChange = false;
4094
4095 while (Position != CurDAG->allnodes_begin()) {
4096 SDNode *N = &*--Position;
4097 if (N->use_empty() || !N->isMachineOpcode())
4098 continue;
4099
4100 if (IsVMerge(N))
4101 MadeChange |= performCombineVMergeAndVOps(N);
4102 }
4103 return MadeChange;
4104}
4105
4106/// If our passthru is an implicit_def, use noreg instead. This side
4107/// steps issues with MachineCSE not being able to CSE expressions with
4108/// IMPLICIT_DEF operands while preserving the semantic intent. See
4109/// pr64282 for context. Note that this transform is the last one
4110/// performed at ISEL DAG to DAG.
4111bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4112 bool MadeChange = false;
4114
4115 while (Position != CurDAG->allnodes_begin()) {
4116 SDNode *N = &*--Position;
4117 if (N->use_empty() || !N->isMachineOpcode())
4118 continue;
4119
4120 const unsigned Opc = N->getMachineOpcode();
4121 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
4123 !isImplicitDef(N->getOperand(0)))
4124 continue;
4125
4127 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
4128 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4129 SDValue Op = N->getOperand(I);
4130 Ops.push_back(Op);
4131 }
4132
4134 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4135 Result->setFlags(N->getFlags());
4136 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
4137 ReplaceUses(N, Result);
4138 MadeChange = true;
4139 }
4140 return MadeChange;
4141}
4142
4143
4144// This pass converts a legalized DAG into a RISCV-specific DAG, ready
4145// for instruction scheduling.
4147 CodeGenOptLevel OptLevel) {
4148 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4149}
4150
4152
4154 CodeGenOptLevel OptLevel)
4156 ID, std::make_unique<RISCVDAGToDAGISel>(TM, OptLevel)) {}
4157
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false, bool IsRV32Zdinx=false)
#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp)
static unsigned getSegInstNF(unsigned Intrinsic)
static bool hasGPROut(unsigned Opc)
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
#define INST_ALL_NF_CASE_WITH_FF(NAME)
#define CASE_VMSLT_OPCODES(lmulenum, suffix)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
static SDValue findVSplat(SDValue N)
static SDValue getMaskSetter(SDValue MaskOp, SDValue GlueOp)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm)
#define INST_ALL_NF_CASE(NAME)
static bool IsVMerge(SDNode *N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define PASS_NAME
Value * RHS
Value * LHS
bool isZero() const
Definition: APFloat.h:1436
APInt bitcastToAPInt() const
Definition: APFloat.h:1346
bool isPosZero() const
Definition: APFloat.h:1451
bool isNegZero() const
Definition: APFloat.h:1452
Class for arbitrary precision integers.
Definition: APInt.h:78
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
This class is used to form a handle around another node that is persistent and is updated across invo...
static StringRef getMemConstraintName(ConstraintCode C)
Definition: InlineAsm.h:467
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
Definition: MCInstrDesc.h:463
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Machine Value Type.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TargetMachine, CodeGenOptLevel OptLevel)
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset, bool IsRV32Zdinx=false)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset should be all zeros.
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset)
bool SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
void selectVLSEGFF(SDNode *Node, unsigned NF, bool IsMasked)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
bool selectInvLogicImm(SDValue N, SDValue &Val)
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool selectScalarFPAsInt(SDValue N, SDValue &Imm)
bool hasAllBUsers(SDNode *Node) const
void selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
void selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
void selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
unsigned getXLen() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVII::VLMUL getLMUL(MVT VT)
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
op_iterator op_begin() const
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
const TargetLowering * TLI
MachineFunction * MF
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
bool mayRaiseFPException(SDNode *Node) const
Return whether the node may raise an FP exception.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:748
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:575
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:799
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:456
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:555
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:556
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:825
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:495
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:753
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:710
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:698
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:490
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:584
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:578
ilist< SDNode >::iterator allnodes_iterator
Definition: SelectionDAG.h:558
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
TargetInstrInfo - Interface to description of machine instruction set.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1312
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1292
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1308
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1505
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1562
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1613
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1658
static bool hasRoundModeOp(uint64_t TSFlags)
static bool hasVLOp(uint64_t TSFlags)
static bool elementsDependOnMask(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool elementsDependOnVL(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
@ SplitF64
Turns a f64 into a pair of i32s.
@ BuildPairF64
Turns a pair of i32s into an f64.
@ BuildGPRPair
Turn a pair of i<xlen>s into an even-odd register pair (untyped).
@ SplitGPRPair
Turn an even-odd register pair (untyped) into a pair of i<xlen>s.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul)
unsigned encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic)
std::optional< unsigned > getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
static const MachineMemOperand::Flags MONontemporalBit1
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:255
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:285
unsigned M1(unsigned Val)
Definition: VE.h:376
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
@ Add
Sum of integers.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:581
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
bool hasNoFPExcept() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.