LLVM 23.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
22#include "llvm/IR/IntrinsicsRISCV.h"
24#include "llvm/Support/Debug.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "riscv-isel"
31#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
32
34
36 "riscv-use-rematerializable-movimm", cl::Hidden,
37 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
38 "constant materialization"),
39 cl::init(false));
40
41#define GET_DAGISEL_BODY RISCVDAGToDAGISel
42#include "RISCVGenDAGISel.inc"
43
45 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
46
47 bool MadeChange = false;
48 while (Position != CurDAG->allnodes_begin()) {
49 SDNode *N = &*--Position;
50 if (N->use_empty())
51 continue;
52
53 SDValue Result;
54 switch (N->getOpcode()) {
55 case ISD::SPLAT_VECTOR: {
56 if (Subtarget->hasStdExtP())
57 break;
58 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
59 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
60 MVT VT = N->getSimpleValueType(0);
61 unsigned Opc =
62 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
63 SDLoc DL(N);
64 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
65 SDValue Src = N->getOperand(0);
66 if (VT.isInteger())
67 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
68 N->getOperand(0));
69 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
70 break;
71 }
72 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
73 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
74 // load. Done after lowering and combining so that we have a chance to
75 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
76 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
77 MVT VT = N->getSimpleValueType(0);
78 SDValue Passthru = N->getOperand(0);
79 SDValue Lo = N->getOperand(1);
80 SDValue Hi = N->getOperand(2);
81 SDValue VL = N->getOperand(3);
82 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
83 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
84 "Unexpected VTs!");
85 MachineFunction &MF = CurDAG->getMachineFunction();
86 SDLoc DL(N);
87
88 // Create temporary stack for each expanding node.
89 SDValue StackSlot =
90 CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
91 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
93
94 SDValue Chain = CurDAG->getEntryNode();
95 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
96
97 SDValue OffsetSlot =
98 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
99 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
100 Align(8));
101
102 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
103
104 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
105 SDValue IntID =
106 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
107 SDValue Ops[] = {Chain,
108 IntID,
109 Passthru,
110 StackSlot,
111 CurDAG->getRegister(RISCV::X0, MVT::i64),
112 VL};
113
114 Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
115 MVT::i64, MPI, Align(8),
117 break;
118 }
119 case ISD::FP_EXTEND: {
120 // We only have vector patterns for riscv_fpextend_vl in isel.
121 SDLoc DL(N);
122 MVT VT = N->getSimpleValueType(0);
123 if (!VT.isVector())
124 break;
125 SDValue VLMAX = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
126 SDValue TrueMask = CurDAG->getNode(
127 RISCVISD::VMSET_VL, DL, VT.changeVectorElementType(MVT::i1), VLMAX);
128 Result = CurDAG->getNode(RISCVISD::FP_EXTEND_VL, DL, VT, N->getOperand(0),
129 TrueMask, VLMAX);
130 break;
131 }
132 }
133
134 if (Result) {
135 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
136 LLVM_DEBUG(N->dump(CurDAG));
137 LLVM_DEBUG(dbgs() << "\nNew: ");
138 LLVM_DEBUG(Result->dump(CurDAG));
139 LLVM_DEBUG(dbgs() << "\n");
140
141 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
142 MadeChange = true;
143 }
144 }
145
146 if (MadeChange)
147 CurDAG->RemoveDeadNodes();
148}
149
151 HandleSDNode Dummy(CurDAG->getRoot());
152 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
153
154 bool MadeChange = false;
155 while (Position != CurDAG->allnodes_begin()) {
156 SDNode *N = &*--Position;
157 // Skip dead nodes and any non-machine opcodes.
158 if (N->use_empty() || !N->isMachineOpcode())
159 continue;
160
161 MadeChange |= doPeepholeSExtW(N);
162
163 // FIXME: This is here only because the VMerge transform doesn't
164 // know how to handle masked true inputs. Once that has been moved
165 // to post-ISEL, this can be deleted as well.
166 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
167 }
168
169 CurDAG->setRoot(Dummy.getValue());
170
171 // After we're done with everything else, convert IMPLICIT_DEF
172 // passthru operands to NoRegister. This is required to workaround
173 // an optimization deficiency in MachineCSE. This really should
174 // be merged back into each of the patterns (i.e. there's no good
175 // reason not to go directly to NoReg), but is being done this way
176 // to allow easy backporting.
177 MadeChange |= doPeepholeNoRegPassThru();
178
179 if (MadeChange)
180 CurDAG->RemoveDeadNodes();
181}
182
183static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
185 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
186 for (const RISCVMatInt::Inst &Inst : Seq) {
187 SDValue SDImm = CurDAG->getSignedTargetConstant(Inst.getImm(), DL, VT);
188 SDNode *Result = nullptr;
189 switch (Inst.getOpndKind()) {
190 case RISCVMatInt::Imm:
191 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
192 break;
194 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
195 CurDAG->getRegister(RISCV::X0, VT));
196 break;
198 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
199 break;
201 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
202 break;
203 }
204
205 // Only the first instruction has X0 as its source.
206 SrcReg = SDValue(Result, 0);
207 }
208
209 return SrcReg;
210}
211
212static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
213 int64_t Imm, const RISCVSubtarget &Subtarget) {
215
216 // Use a rematerializable pseudo instruction for short sequences if enabled.
217 if (Seq.size() == 2 && UsePseudoMovImm)
218 return SDValue(
219 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
220 CurDAG->getSignedTargetConstant(Imm, DL, VT)),
221 0);
222
223 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
224 // worst an LUI+ADDIW. This will require an extra register, but avoids a
225 // constant pool.
226 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
227 // low and high 32 bits are the same and bit 31 and 63 are set.
228 if (Seq.size() > 3) {
229 unsigned ShiftAmt, AddOpc;
231 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
232 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
233 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
234
235 SDValue SLLI = SDValue(
236 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
237 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
238 0);
239 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
240 }
241 }
242
243 // Otherwise, use the original sequence.
244 return selectImmSeq(CurDAG, DL, VT, Seq);
245}
246
248 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
249 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
250 bool IsLoad, MVT *IndexVT) {
251 SDValue Chain = Node->getOperand(0);
252
253 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
254
255 if (IsStridedOrIndexed) {
256 Operands.push_back(Node->getOperand(CurOp++)); // Index.
257 if (IndexVT)
258 *IndexVT = Operands.back()->getSimpleValueType(0);
259 }
260
261 if (IsMasked) {
262 SDValue Mask = Node->getOperand(CurOp++);
263 Operands.push_back(Mask);
264 }
265 SDValue VL;
266 selectVLOp(Node->getOperand(CurOp++), VL);
267 Operands.push_back(VL);
268
269 MVT XLenVT = Subtarget->getXLenVT();
270 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
271 Operands.push_back(SEWOp);
272
273 // At the IR layer, all the masked load intrinsics have policy operands,
274 // none of the others do. All have passthru operands. For our pseudos,
275 // all loads have policy operands.
276 if (IsLoad) {
278 if (IsMasked)
279 Policy = Node->getConstantOperandVal(CurOp++);
280 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
281 Operands.push_back(PolicyOp);
282 }
283
284 Operands.push_back(Chain); // Chain.
285}
286
287void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
288 bool IsStrided) {
289 SDLoc DL(Node);
290 MVT VT = Node->getSimpleValueType(0);
291 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
293
294 unsigned CurOp = 2;
296
297 Operands.push_back(Node->getOperand(CurOp++));
298
299 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
300 Operands, /*IsLoad=*/true);
301
302 const RISCV::VLSEGPseudo *P =
303 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
304 static_cast<unsigned>(LMUL));
305 MachineSDNode *Load =
306 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
307
308 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
309
310 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
311 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
312 CurDAG->RemoveDeadNode(Node);
313}
314
316 bool IsMasked) {
317 SDLoc DL(Node);
318 MVT VT = Node->getSimpleValueType(0);
319 MVT XLenVT = Subtarget->getXLenVT();
320 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
322
323 unsigned CurOp = 2;
325
326 Operands.push_back(Node->getOperand(CurOp++));
327
328 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
329 /*IsStridedOrIndexed*/ false, Operands,
330 /*IsLoad=*/true);
331
332 const RISCV::VLSEGPseudo *P =
333 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
334 Log2SEW, static_cast<unsigned>(LMUL));
335 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
336 XLenVT, MVT::Other, Operands);
337
338 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
339
340 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); // Result
341 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); // VL
342 ReplaceUses(SDValue(Node, 2), SDValue(Load, 2)); // Chain
343 CurDAG->RemoveDeadNode(Node);
344}
345
346void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
347 bool IsOrdered) {
348 SDLoc DL(Node);
349 MVT VT = Node->getSimpleValueType(0);
350 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
352
353 unsigned CurOp = 2;
355
356 Operands.push_back(Node->getOperand(CurOp++));
357
358 MVT IndexVT;
359 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
360 /*IsStridedOrIndexed*/ true, Operands,
361 /*IsLoad=*/true, &IndexVT);
362
363#ifndef NDEBUG
364 // Number of element = RVVBitsPerBlock * LMUL / SEW
365 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
366 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
367 if (DecodedLMUL.second)
368 ContainedTyNumElts /= DecodedLMUL.first;
369 else
370 ContainedTyNumElts *= DecodedLMUL.first;
371 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
372 "Element count mismatch");
373#endif
374
376 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
377 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
378 reportFatalUsageError("The V extension does not support EEW=64 for index "
379 "values when XLEN=32");
380 }
381 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
382 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
383 static_cast<unsigned>(IndexLMUL));
384 MachineSDNode *Load =
385 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
386
387 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
388
389 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
390 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
391 CurDAG->RemoveDeadNode(Node);
392}
393
394void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
395 bool IsStrided) {
396 SDLoc DL(Node);
397 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
398 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
400
401 unsigned CurOp = 2;
403
404 Operands.push_back(Node->getOperand(CurOp++));
405
406 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
407 Operands);
408
409 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
410 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
411 MachineSDNode *Store =
412 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
413
414 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
415
416 ReplaceNode(Node, Store);
417}
418
419void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
420 bool IsOrdered) {
421 SDLoc DL(Node);
422 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
423 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
425
426 unsigned CurOp = 2;
428
429 Operands.push_back(Node->getOperand(CurOp++));
430
431 MVT IndexVT;
432 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
433 /*IsStridedOrIndexed*/ true, Operands,
434 /*IsLoad=*/false, &IndexVT);
435
436#ifndef NDEBUG
437 // Number of element = RVVBitsPerBlock * LMUL / SEW
438 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
439 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
440 if (DecodedLMUL.second)
441 ContainedTyNumElts /= DecodedLMUL.first;
442 else
443 ContainedTyNumElts *= DecodedLMUL.first;
444 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
445 "Element count mismatch");
446#endif
447
449 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
450 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
451 reportFatalUsageError("The V extension does not support EEW=64 for index "
452 "values when XLEN=32");
453 }
454 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
455 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
456 static_cast<unsigned>(IndexLMUL));
457 MachineSDNode *Store =
458 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
459
460 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
461
462 ReplaceNode(Node, Store);
463}
464
466 if (!Subtarget->hasVInstructions())
467 return;
468
469 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
470
471 SDLoc DL(Node);
472 MVT XLenVT = Subtarget->getXLenVT();
473
474 unsigned IntNo = Node->getConstantOperandVal(0);
475
476 assert((IntNo == Intrinsic::riscv_vsetvli ||
477 IntNo == Intrinsic::riscv_vsetvlimax) &&
478 "Unexpected vsetvli intrinsic");
479
480 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
481 unsigned Offset = (VLMax ? 1 : 2);
482
483 assert(Node->getNumOperands() == Offset + 2 &&
484 "Unexpected number of operands");
485
486 unsigned SEW =
487 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
488 RISCVVType::VLMUL VLMul = static_cast<RISCVVType::VLMUL>(
489 Node->getConstantOperandVal(Offset + 1) & 0x7);
490
491 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
492 /*MaskAgnostic*/ true);
493 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
494
495 SDValue VLOperand;
496 unsigned Opcode = RISCV::PseudoVSETVLI;
497 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
498 if (auto VLEN = Subtarget->getRealVLen())
499 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
500 VLMax = true;
501 }
502 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
503 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
504 Opcode = RISCV::PseudoVSETVLIX0;
505 } else {
506 VLOperand = Node->getOperand(1);
507
508 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
509 uint64_t AVL = C->getZExtValue();
510 if (isUInt<5>(AVL)) {
511 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
512 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
513 XLenVT, VLImm, VTypeIOp));
514 return;
515 }
516 }
517 }
518
520 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
521}
522
524 if (!Subtarget->hasVendorXSfmmbase())
525 return;
526
527 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
528
529 SDLoc DL(Node);
530 MVT XLenVT = Subtarget->getXLenVT();
531
532 unsigned IntNo = Node->getConstantOperandVal(0);
533
534 assert((IntNo == Intrinsic::riscv_sf_vsettnt ||
535 IntNo == Intrinsic::riscv_sf_vsettm ||
536 IntNo == Intrinsic::riscv_sf_vsettk) &&
537 "Unexpected XSfmm vset intrinsic");
538
539 unsigned SEW = RISCVVType::decodeVSEW(Node->getConstantOperandVal(2));
540 unsigned Widen = RISCVVType::decodeTWiden(Node->getConstantOperandVal(3));
541 unsigned PseudoOpCode =
542 IntNo == Intrinsic::riscv_sf_vsettnt ? RISCV::PseudoSF_VSETTNT
543 : IntNo == Intrinsic::riscv_sf_vsettm ? RISCV::PseudoSF_VSETTM
544 : RISCV::PseudoSF_VSETTK;
545
546 if (IntNo == Intrinsic::riscv_sf_vsettnt) {
547 unsigned VTypeI = RISCVVType::encodeXSfmmVType(SEW, Widen, 0);
548 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
549
550 ReplaceNode(Node, CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
551 Node->getOperand(1), VTypeIOp));
552 } else {
553 SDValue Log2SEW = CurDAG->getTargetConstant(Log2_32(SEW), DL, XLenVT);
554 SDValue TWiden = CurDAG->getTargetConstant(Widen, DL, XLenVT);
556 CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
557 Node->getOperand(1), Log2SEW, TWiden));
558 }
559}
560
562 MVT VT = Node->getSimpleValueType(0);
563 unsigned Opcode = Node->getOpcode();
564 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
565 "Unexpected opcode");
566 SDLoc DL(Node);
567
568 // For operations of the form (x << C1) op C2, check if we can use
569 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
570 SDValue N0 = Node->getOperand(0);
571 SDValue N1 = Node->getOperand(1);
572
574 if (!Cst)
575 return false;
576
577 int64_t Val = Cst->getSExtValue();
578
579 // Check if immediate can already use ANDI/ORI/XORI.
580 if (isInt<12>(Val))
581 return false;
582
583 SDValue Shift = N0;
584
585 // If Val is simm32 and we have a sext_inreg from i32, then the binop
586 // produces at least 33 sign bits. We can peek through the sext_inreg and use
587 // a SLLIW at the end.
588 bool SignExt = false;
589 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
590 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
591 SignExt = true;
592 Shift = N0.getOperand(0);
593 }
594
595 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
596 return false;
597
599 if (!ShlCst)
600 return false;
601
602 uint64_t ShAmt = ShlCst->getZExtValue();
603
604 // Make sure that we don't change the operation by removing bits.
605 // This only matters for OR and XOR, AND is unaffected.
606 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
607 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
608 return false;
609
610 int64_t ShiftedVal = Val >> ShAmt;
611 if (!isInt<12>(ShiftedVal))
612 return false;
613
614 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
615 if (SignExt && ShAmt >= 32)
616 return false;
617
618 // Ok, we can reorder to get a smaller immediate.
619 unsigned BinOpc;
620 switch (Opcode) {
621 default: llvm_unreachable("Unexpected opcode");
622 case ISD::AND: BinOpc = RISCV::ANDI; break;
623 case ISD::OR: BinOpc = RISCV::ORI; break;
624 case ISD::XOR: BinOpc = RISCV::XORI; break;
625 }
626
627 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
628
629 SDNode *BinOp = CurDAG->getMachineNode(
630 BinOpc, DL, VT, Shift.getOperand(0),
631 CurDAG->getSignedTargetConstant(ShiftedVal, DL, VT));
632 SDNode *SLLI =
633 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
634 CurDAG->getTargetConstant(ShAmt, DL, VT));
635 ReplaceNode(Node, SLLI);
636 return true;
637}
638
640 unsigned Opc;
641
642 if (Subtarget->hasVendorXTHeadBb())
643 Opc = RISCV::TH_EXT;
644 else if (Subtarget->hasVendorXAndesPerf())
645 Opc = RISCV::NDS_BFOS;
646 else if (Subtarget->hasVendorXqcibm())
647 Opc = RISCV::QC_EXT;
648 else
649 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
650 return false;
651
652 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
653 if (!N1C)
654 return false;
655
656 SDValue N0 = Node->getOperand(0);
657 if (!N0.hasOneUse())
658 return false;
659
660 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb,
661 const SDLoc &DL, MVT VT) {
662 if (Opc == RISCV::QC_EXT) {
663 // QC.EXT X, width, shamt
664 // shamt is the same as Lsb
665 // width is the number of bits to extract from the Lsb
666 Msb = Msb - Lsb + 1;
667 }
668 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
669 CurDAG->getTargetConstant(Msb, DL, VT),
670 CurDAG->getTargetConstant(Lsb, DL, VT));
671 };
672
673 SDLoc DL(Node);
674 MVT VT = Node->getSimpleValueType(0);
675 const unsigned RightShAmt = N1C->getZExtValue();
676
677 // Transform (sra (shl X, C1) C2) with C1 < C2
678 // -> (SignedBitfieldExtract X, msb, lsb)
679 if (N0.getOpcode() == ISD::SHL) {
680 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
681 if (!N01C)
682 return false;
683
684 const unsigned LeftShAmt = N01C->getZExtValue();
685 // Make sure that this is a bitfield extraction (i.e., the shift-right
686 // amount can not be less than the left-shift).
687 if (LeftShAmt > RightShAmt)
688 return false;
689
690 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
691 const unsigned Msb = MsbPlusOne - 1;
692 const unsigned Lsb = RightShAmt - LeftShAmt;
693
694 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
695 ReplaceNode(Node, Sbe);
696 return true;
697 }
698
699 // Transform (sra (sext_inreg X, _), C) ->
700 // (SignedBitfieldExtract X, msb, lsb)
701 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
702 unsigned ExtSize =
703 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
704
705 // ExtSize of 32 should use sraiw via tablegen pattern.
706 if (ExtSize == 32)
707 return false;
708
709 const unsigned Msb = ExtSize - 1;
710 // If the shift-right amount is greater than Msb, it means that extracts
711 // the X[Msb] bit and sign-extend it.
712 const unsigned Lsb = RightShAmt > Msb ? Msb : RightShAmt;
713
714 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
715 ReplaceNode(Node, Sbe);
716 return true;
717 }
718
719 return false;
720}
721
723 // Only supported with XAndesPerf at the moment.
724 if (!Subtarget->hasVendorXAndesPerf())
725 return false;
726
727 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
728 if (!N1C)
729 return false;
730
731 SDValue N0 = Node->getOperand(0);
732 if (!N0.hasOneUse())
733 return false;
734
735 auto BitfieldInsert = [&](SDValue N0, unsigned Msb, unsigned Lsb,
736 const SDLoc &DL, MVT VT) {
737 unsigned Opc = RISCV::NDS_BFOS;
738 // If the Lsb is equal to the Msb, then the Lsb should be 0.
739 if (Lsb == Msb)
740 Lsb = 0;
741 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
742 CurDAG->getTargetConstant(Lsb, DL, VT),
743 CurDAG->getTargetConstant(Msb, DL, VT));
744 };
745
746 SDLoc DL(Node);
747 MVT VT = Node->getSimpleValueType(0);
748 const unsigned RightShAmt = N1C->getZExtValue();
749
750 // Transform (sra (shl X, C1) C2) with C1 > C2
751 // -> (NDS.BFOS X, lsb, msb)
752 if (N0.getOpcode() == ISD::SHL) {
753 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
754 if (!N01C)
755 return false;
756
757 const unsigned LeftShAmt = N01C->getZExtValue();
758 // Make sure that this is a bitfield insertion (i.e., the shift-right
759 // amount should be less than the left-shift).
760 if (LeftShAmt <= RightShAmt)
761 return false;
762
763 const unsigned MsbPlusOne = VT.getSizeInBits() - RightShAmt;
764 const unsigned Msb = MsbPlusOne - 1;
765 const unsigned Lsb = LeftShAmt - RightShAmt;
766
767 SDNode *Sbi = BitfieldInsert(N0, Msb, Lsb, DL, VT);
768 ReplaceNode(Node, Sbi);
769 return true;
770 }
771
772 return false;
773}
774
776 const SDLoc &DL, MVT VT,
777 SDValue X, unsigned Msb,
778 unsigned Lsb) {
779 unsigned Opc;
780
781 if (Subtarget->hasVendorXTHeadBb()) {
782 Opc = RISCV::TH_EXTU;
783 } else if (Subtarget->hasVendorXAndesPerf()) {
784 Opc = RISCV::NDS_BFOZ;
785 } else if (Subtarget->hasVendorXqcibm()) {
786 Opc = RISCV::QC_EXTU;
787 // QC.EXTU X, width, shamt
788 // shamt is the same as Lsb
789 // width is the number of bits to extract from the Lsb
790 Msb = Msb - Lsb + 1;
791 } else {
792 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
793 return false;
794 }
795
796 SDNode *Ube = CurDAG->getMachineNode(Opc, DL, VT, X,
797 CurDAG->getTargetConstant(Msb, DL, VT),
798 CurDAG->getTargetConstant(Lsb, DL, VT));
799 ReplaceNode(Node, Ube);
800 return true;
801}
802
804 const SDLoc &DL, MVT VT,
805 SDValue X, unsigned Msb,
806 unsigned Lsb) {
807 // Only supported with XAndesPerf at the moment.
808 if (!Subtarget->hasVendorXAndesPerf())
809 return false;
810
811 unsigned Opc = RISCV::NDS_BFOZ;
812
813 // If the Lsb is equal to the Msb, then the Lsb should be 0.
814 if (Lsb == Msb)
815 Lsb = 0;
816 SDNode *Ubi = CurDAG->getMachineNode(Opc, DL, VT, X,
817 CurDAG->getTargetConstant(Lsb, DL, VT),
818 CurDAG->getTargetConstant(Msb, DL, VT));
819 ReplaceNode(Node, Ubi);
820 return true;
821}
822
824 // Target does not support indexed loads.
825 if (!Subtarget->hasVendorXTHeadMemIdx())
826 return false;
827
830 if (AM == ISD::UNINDEXED)
831 return false;
832
834 if (!C)
835 return false;
836
837 EVT LoadVT = Ld->getMemoryVT();
838 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
839 "Unexpected addressing mode");
840 bool IsPre = AM == ISD::PRE_INC;
841 bool IsPost = AM == ISD::POST_INC;
842 int64_t Offset = C->getSExtValue();
843
844 // The constants that can be encoded in the THeadMemIdx instructions
845 // are of the form (sign_extend(imm5) << imm2).
846 unsigned Shift;
847 for (Shift = 0; Shift < 4; Shift++)
848 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
849 break;
850
851 // Constant cannot be encoded.
852 if (Shift == 4)
853 return false;
854
855 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
856 unsigned Opcode;
857 if (LoadVT == MVT::i8 && IsPre)
858 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
859 else if (LoadVT == MVT::i8 && IsPost)
860 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
861 else if (LoadVT == MVT::i16 && IsPre)
862 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
863 else if (LoadVT == MVT::i16 && IsPost)
864 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
865 else if (LoadVT == MVT::i32 && IsPre)
866 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
867 else if (LoadVT == MVT::i32 && IsPost)
868 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
869 else if (LoadVT == MVT::i64 && IsPre)
870 Opcode = RISCV::TH_LDIB;
871 else if (LoadVT == MVT::i64 && IsPost)
872 Opcode = RISCV::TH_LDIA;
873 else
874 return false;
875
876 EVT Ty = Ld->getOffset().getValueType();
877 SDValue Ops[] = {
878 Ld->getBasePtr(),
879 CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
880 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), Ld->getChain()};
881 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
882 Ld->getValueType(1), MVT::Other, Ops);
883
884 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
885 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
886
887 ReplaceNode(Node, New);
888
889 return true;
890}
891
892static SDValue buildGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, MVT VT,
893 SDValue Lo, SDValue Hi) {
894 SDValue Ops[] = {
895 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Lo,
896 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Hi,
897 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
898
899 return SDValue(
900 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops), 0);
901}
902
903// Helper to extract Lo and Hi values from a GPR pair.
904static std::pair<SDValue, SDValue>
906 SDValue Lo =
907 CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, MVT::i32, Pair);
908 SDValue Hi =
909 CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, MVT::i32, Pair);
910 return {Lo, Hi};
911}
912
913// Try to match WMACC pattern: ADDD where one operand pair comes from a
914// widening multiply (both results of UMUL_LOHI, SMUL_LOHI, or WMULSU).
916 assert(Node->getOpcode() == RISCVISD::ADDD && "Expected ADDD");
917
918 SDValue Op0Lo = Node->getOperand(0);
919 SDValue Op0Hi = Node->getOperand(1);
920 SDValue Op1Lo = Node->getOperand(2);
921 SDValue Op1Hi = Node->getOperand(3);
922
923 auto IsSupportedMulWithOneUse = [](SDValue Lo, SDValue Hi) {
924 unsigned Opc = Lo.getOpcode();
925 if (Opc != ISD::UMUL_LOHI && Opc != ISD::SMUL_LOHI &&
926 Opc != RISCVISD::WMULSU)
927 return false;
928 return Lo.getNode() == Hi.getNode() && Lo.getResNo() == 0 &&
929 Hi.getResNo() == 1 && Lo.hasOneUse() && Hi.hasOneUse();
930 };
931
932 SDNode *MulNode = nullptr;
933 SDValue AddLo, AddHi;
934
935 // Check if first operand pair is a supported multiply with single use.
936 if (IsSupportedMulWithOneUse(Op0Lo, Op0Hi)) {
937 MulNode = Op0Lo.getNode();
938 AddLo = Op1Lo;
939 AddHi = Op1Hi;
940 }
941 // ADDD is commutative. Check if second operand pair is a supported multiply
942 // with single use.
943 else if (IsSupportedMulWithOneUse(Op1Lo, Op1Hi)) {
944 MulNode = Op1Lo.getNode();
945 AddLo = Op0Lo;
946 AddHi = Op0Hi;
947 } else {
948 return false;
949 }
950
951 unsigned Opc;
952 switch (MulNode->getOpcode()) {
953 default:
954 llvm_unreachable("Unexpected multiply opcode");
955 case ISD::UMUL_LOHI:
956 Opc = RISCV::WMACCU;
957 break;
958 case ISD::SMUL_LOHI:
959 Opc = RISCV::WMACC;
960 break;
961 case RISCVISD::WMULSU:
962 Opc = RISCV::WMACCSU;
963 break;
964 }
965
966 SDValue Acc = buildGPRPair(CurDAG, DL, MVT::Untyped, AddLo, AddHi);
967
968 // WMACC instruction format: rd, rs1, rs2 (rd is accumulator).
969 SDValue M0 = MulNode->getOperand(0);
970 SDValue M1 = MulNode->getOperand(1);
971 MachineSDNode *New =
972 CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Acc, M0, M1);
973
974 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(New, 0));
977 CurDAG->RemoveDeadNode(Node);
978 return true;
979}
980
981static Register getTileReg(uint64_t TileNum) {
982 assert(TileNum <= 15 && "Invalid tile number");
983 return RISCV::T0 + TileNum;
984}
985
987 if (!Subtarget->hasVInstructions())
988 return;
989
990 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
991
992 SDLoc DL(Node);
993 unsigned IntNo = Node->getConstantOperandVal(1);
994
995 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
996 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
997 "Unexpected vsetvli intrinsic");
998
999 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
1000 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
1001 SDValue SEWOp =
1002 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
1003 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
1004 Node->getOperand(4), Node->getOperand(5),
1005 Node->getOperand(8), SEWOp,
1006 Node->getOperand(0)};
1007
1008 unsigned Opcode;
1009 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
1010 switch (LMulSDNode->getSExtValue()) {
1011 case 5:
1012 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF8
1013 : RISCV::PseudoSF_VC_I_SE_MF8;
1014 break;
1015 case 6:
1016 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF4
1017 : RISCV::PseudoSF_VC_I_SE_MF4;
1018 break;
1019 case 7:
1020 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF2
1021 : RISCV::PseudoSF_VC_I_SE_MF2;
1022 break;
1023 case 0:
1024 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M1
1025 : RISCV::PseudoSF_VC_I_SE_M1;
1026 break;
1027 case 1:
1028 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M2
1029 : RISCV::PseudoSF_VC_I_SE_M2;
1030 break;
1031 case 2:
1032 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M4
1033 : RISCV::PseudoSF_VC_I_SE_M4;
1034 break;
1035 case 3:
1036 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M8
1037 : RISCV::PseudoSF_VC_I_SE_M8;
1038 break;
1039 }
1040
1041 ReplaceNode(Node, CurDAG->getMachineNode(
1042 Opcode, DL, Node->getSimpleValueType(0), Operands));
1043}
1044
1045static unsigned getSegInstNF(unsigned Intrinsic) {
1046#define INST_NF_CASE(NAME, NF) \
1047 case Intrinsic::riscv_##NAME##NF: \
1048 return NF;
1049#define INST_NF_CASE_MASK(NAME, NF) \
1050 case Intrinsic::riscv_##NAME##NF##_mask: \
1051 return NF;
1052#define INST_NF_CASE_FF(NAME, NF) \
1053 case Intrinsic::riscv_##NAME##NF##ff: \
1054 return NF;
1055#define INST_NF_CASE_FF_MASK(NAME, NF) \
1056 case Intrinsic::riscv_##NAME##NF##ff_mask: \
1057 return NF;
1058#define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
1059 MACRO_NAME(NAME, 2) \
1060 MACRO_NAME(NAME, 3) \
1061 MACRO_NAME(NAME, 4) \
1062 MACRO_NAME(NAME, 5) \
1063 MACRO_NAME(NAME, 6) \
1064 MACRO_NAME(NAME, 7) \
1065 MACRO_NAME(NAME, 8)
1066#define INST_ALL_NF_CASE(NAME) \
1067 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
1068 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
1069#define INST_ALL_NF_CASE_WITH_FF(NAME) \
1070 INST_ALL_NF_CASE(NAME) \
1071 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
1072 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
1073 switch (Intrinsic) {
1074 default:
1075 llvm_unreachable("Unexpected segment load/store intrinsic");
1077 INST_ALL_NF_CASE(vlsseg)
1078 INST_ALL_NF_CASE(vloxseg)
1079 INST_ALL_NF_CASE(vluxseg)
1080 INST_ALL_NF_CASE(vsseg)
1081 INST_ALL_NF_CASE(vssseg)
1082 INST_ALL_NF_CASE(vsoxseg)
1083 INST_ALL_NF_CASE(vsuxseg)
1084 }
1085}
1086
1087static bool isApplicableToPLIOrPLUI(int Val) {
1088 // Check if the immediate is packed i8 or i10
1089 int16_t Bit31To16 = Val >> 16;
1090 int16_t Bit15To0 = Val;
1091 int8_t Bit15To8 = Bit15To0 >> 8;
1092 int8_t Bit7To0 = Val;
1093 if (Bit31To16 != Bit15To0)
1094 return false;
1095
1096 return isInt<10>(Bit15To0) || isShiftedInt<10, 6>(Bit15To0) ||
1097 Bit15To8 == Bit7To0;
1098}
1099
1101 // If we have a custom node, we have already selected.
1102 if (Node->isMachineOpcode()) {
1103 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
1104 Node->setNodeId(-1);
1105 return;
1106 }
1107
1108 // Instruction Selection not handled by the auto-generated tablegen selection
1109 // should be handled here.
1110 unsigned Opcode = Node->getOpcode();
1111 MVT XLenVT = Subtarget->getXLenVT();
1112 SDLoc DL(Node);
1113 MVT VT = Node->getSimpleValueType(0);
1114
1115 bool HasBitTest = Subtarget->hasBEXTILike();
1116
1117 switch (Opcode) {
1118 case ISD::Constant: {
1119 assert(VT == Subtarget->getXLenVT() && "Unexpected VT");
1120 auto *ConstNode = cast<ConstantSDNode>(Node);
1121 if (ConstNode->isZero()) {
1122 SDValue New =
1123 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
1124 ReplaceNode(Node, New.getNode());
1125 return;
1126 }
1127 int64_t Imm = ConstNode->getSExtValue();
1128 // If only the lower 8 bits are used, try to convert this to a simm6 by
1129 // sign-extending bit 7. This is neutral without the C extension, and
1130 // allows C.LI to be used if C is present.
1131 if (!isInt<8>(Imm) && isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) &&
1133 Imm = SignExtend64<8>(Imm);
1134 // If the upper XLen-16 bits are not used, try to convert this to a simm12
1135 // by sign extending bit 15.
1136 else if (!isInt<16>(Imm) && isUInt<16>(Imm) &&
1138 Imm = SignExtend64<16>(Imm);
1139
1140 // If the upper XLen-16 bits are not used, the lower 2 bytes are the same,
1141 // and we can't use li, convert to an xlen splat so we can use pli.b.
1142 if (Subtarget->hasStdExtP() && !isInt<12>(Imm) &&
1143 (Imm & 0xff) == ((Imm >> 8) & 0xff) && hasAllHUsers(Node)) {
1144 // Splat the lower 16 bits to XLen. Sign extend for RV32.
1145 uint64_t Splat = Imm & 0xffff;
1146 Splat = (Splat << 16) | Splat;
1147 if (VT == MVT::i64)
1148 Imm = Splat << 32 | Splat;
1149 else
1150 Imm = SignExtend64<32>(Splat);
1151 } else {
1152 // If the upper 32-bits are not used try to convert this into a simm32 by
1153 // sign extending bit 32.
1154 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
1155 Imm = SignExtend64<32>(Imm);
1156
1157 if (VT == MVT::i64 && !isInt<12>(Imm) && !isShiftedInt<20, 12>(Imm) &&
1158 Subtarget->hasStdExtP() && isApplicableToPLIOrPLUI(Imm) &&
1159 hasAllWUsers(Node)) {
1160 // If it's 4 packed 8-bit integers or 2 packed signed 16-bit integers,
1161 // we can simply copy lower 32 bits to higher 32 bits to make it able to
1162 // rematerialize to PLI_B or PLI_H
1163 Imm = ((uint64_t)Imm << 32) | (Imm & 0xFFFFFFFF);
1164 }
1165 }
1166
1167 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
1168 return;
1169 }
1170 case ISD::ConstantFP: {
1171 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
1172
1173 bool Is64Bit = Subtarget->is64Bit();
1174 bool HasZdinx = Subtarget->hasStdExtZdinx();
1175
1176 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
1177 SDValue Imm;
1178 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
1179 // create an integer immediate.
1180 if (APF.isPosZero() || NegZeroF64) {
1181 if (VT == MVT::f64 && HasZdinx && !Is64Bit)
1182 Imm = CurDAG->getRegister(RISCV::X0_Pair, MVT::f64);
1183 else
1184 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
1185 } else {
1186 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
1187 *Subtarget);
1188 }
1189
1190 unsigned Opc;
1191 switch (VT.SimpleTy) {
1192 default:
1193 llvm_unreachable("Unexpected size");
1194 case MVT::bf16:
1195 assert(Subtarget->hasStdExtZfbfmin());
1196 Opc = RISCV::FMV_H_X;
1197 break;
1198 case MVT::f16:
1199 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
1200 break;
1201 case MVT::f32:
1202 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
1203 break;
1204 case MVT::f64:
1205 // For RV32, we can't move from a GPR, we need to convert instead. This
1206 // should only happen for +0.0 and -0.0.
1207 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
1208 if (HasZdinx)
1209 Opc = RISCV::COPY;
1210 else
1211 Opc = Is64Bit ? RISCV::FMV_D_X : RISCV::FCVT_D_W;
1212 break;
1213 }
1214
1215 SDNode *Res;
1216 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
1217 Res =
1218 CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
1219 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
1220 Res =
1221 CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
1222 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
1223 Res = CurDAG->getMachineNode(
1224 Opc, DL, VT, Imm,
1225 CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
1226 else
1227 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1228
1229 // For f64 -0.0, we need to insert a fneg.d idiom.
1230 if (NegZeroF64) {
1231 Opc = RISCV::FSGNJN_D;
1232 if (HasZdinx)
1233 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1234 Res =
1235 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1236 }
1237
1238 ReplaceNode(Node, Res);
1239 return;
1240 }
1241 case RISCVISD::BuildGPRPair:
1242 case RISCVISD::BuildPairF64:
1243 case RISCVISD::BuildPairGPRVec: {
1244 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
1245 break;
1246
1247 assert((!Subtarget->is64Bit() || Opcode != RISCVISD::BuildPairF64) &&
1248 "BuildPairF64 only handled here on rv32i_zdinx");
1249
1250 SDValue N =
1251 buildGPRPair(CurDAG, DL, VT, Node->getOperand(0), Node->getOperand(1));
1252 ReplaceNode(Node, N.getNode());
1253 return;
1254 }
1255 case RISCVISD::SplitGPRPair:
1256 case RISCVISD::SplitF64:
1257 case RISCVISD::SplitGPRVec: {
1258 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
1259 assert((!Subtarget->is64Bit() || Opcode != RISCVISD::SplitF64) &&
1260 "SplitF64 only handled here on rv32i_zdinx");
1261
1262 if (!SDValue(Node, 0).use_empty()) {
1263 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1264 Node->getValueType(0),
1265 Node->getOperand(0));
1266 ReplaceUses(SDValue(Node, 0), Lo);
1267 }
1268
1269 if (!SDValue(Node, 1).use_empty()) {
1270 SDValue Hi = CurDAG->getTargetExtractSubreg(
1271 RISCV::sub_gpr_odd, DL, Node->getValueType(1), Node->getOperand(0));
1272 ReplaceUses(SDValue(Node, 1), Hi);
1273 }
1274
1275 CurDAG->RemoveDeadNode(Node);
1276 return;
1277 }
1278
1279 if (!Subtarget->hasStdExtZfa())
1280 break;
1281 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1282 "Unexpected subtarget");
1283
1284 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1285 if (!SDValue(Node, 0).use_empty()) {
1286 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1287 Node->getOperand(0));
1288 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1289 }
1290 if (!SDValue(Node, 1).use_empty()) {
1291 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1292 Node->getOperand(0));
1293 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1294 }
1295
1296 CurDAG->RemoveDeadNode(Node);
1297 return;
1298 }
1299 case ISD::SHL: {
1300 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1301 if (!N1C)
1302 break;
1303 SDValue N0 = Node->getOperand(0);
1304 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1306 break;
1307 unsigned ShAmt = N1C->getZExtValue();
1308 uint64_t Mask = N0.getConstantOperandVal(1);
1309
1310 if (isShiftedMask_64(Mask)) {
1311 unsigned XLen = Subtarget->getXLen();
1312 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1313 unsigned TrailingZeros = llvm::countr_zero(Mask);
1314 if (ShAmt <= 32 && TrailingZeros > 0 && LeadingZeros == 32) {
1315 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1316 // where C2 has 32 leading zeros and C3 trailing zeros.
1317 SDNode *SRLIW = CurDAG->getMachineNode(
1318 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1319 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1320 SDNode *SLLI = CurDAG->getMachineNode(
1321 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1322 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1323 ReplaceNode(Node, SLLI);
1324 return;
1325 }
1326 if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1327 XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1328 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1329 // where C2 has C4 leading zeros and no trailing zeros.
1330 // This is profitable if the "and" was to be lowered to
1331 // (srli (slli X, C4), C4) and not (andi X, C2).
1332 // For "LeadingZeros == 32":
1333 // - with Zba it's just (slli.uw X, C)
1334 // - without Zba a tablegen pattern applies the very same
1335 // transform as we would have done here
1336 SDNode *SLLI = CurDAG->getMachineNode(
1337 RISCV::SLLI, DL, VT, N0.getOperand(0),
1338 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1339 SDNode *SRLI = CurDAG->getMachineNode(
1340 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1341 CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
1342 ReplaceNode(Node, SRLI);
1343 return;
1344 }
1345 }
1346 break;
1347 }
1348 case ISD::SRL: {
1349 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1350 if (!N1C)
1351 break;
1352 SDValue N0 = Node->getOperand(0);
1353 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1354 break;
1355 unsigned ShAmt = N1C->getZExtValue();
1356 uint64_t Mask = N0.getConstantOperandVal(1);
1357
1358 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1359 // 32 leading zeros and C3 trailing zeros.
1360 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1361 unsigned XLen = Subtarget->getXLen();
1362 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1363 unsigned TrailingZeros = llvm::countr_zero(Mask);
1364 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1365 SDNode *SRLIW = CurDAG->getMachineNode(
1366 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1367 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1368 SDNode *SLLI = CurDAG->getMachineNode(
1369 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1370 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1371 ReplaceNode(Node, SLLI);
1372 return;
1373 }
1374 }
1375
1376 // Optimize (srl (and X, C2), C) ->
1377 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1378 // Where C2 is a mask with C3 trailing ones.
1379 // Taking into account that the C2 may have had lower bits unset by
1380 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1381 // This pattern occurs when type legalizing right shifts for types with
1382 // less than XLen bits.
1383 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1384 if (!isMask_64(Mask))
1385 break;
1386 unsigned TrailingOnes = llvm::countr_one(Mask);
1387 if (ShAmt >= TrailingOnes)
1388 break;
1389 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1390 if (TrailingOnes == 32) {
1391 SDNode *SRLI = CurDAG->getMachineNode(
1392 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1393 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1394 ReplaceNode(Node, SRLI);
1395 return;
1396 }
1397
1398 // Only do the remaining transforms if the AND has one use.
1399 if (!N0.hasOneUse())
1400 break;
1401
1402 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1403 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1404 SDNode *BEXTI = CurDAG->getMachineNode(
1405 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1406 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1407 ReplaceNode(Node, BEXTI);
1408 return;
1409 }
1410
1411 const unsigned Msb = TrailingOnes - 1;
1412 const unsigned Lsb = ShAmt;
1413 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0.getOperand(0), Msb, Lsb))
1414 return;
1415
1416 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1417 SDNode *SLLI =
1418 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1419 CurDAG->getTargetConstant(LShAmt, DL, VT));
1420 SDNode *SRLI = CurDAG->getMachineNode(
1421 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1422 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1423 ReplaceNode(Node, SRLI);
1424 return;
1425 }
1426 case ISD::SRA: {
1428 return;
1429
1431 return;
1432
1433 // Optimize (sra (sext_inreg X, i16), C) ->
1434 // (srai (slli X, (XLen-16), (XLen-16) + C)
1435 // And (sra (sext_inreg X, i8), C) ->
1436 // (srai (slli X, (XLen-8), (XLen-8) + C)
1437 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1438 // This transform matches the code we get without Zbb. The shifts are more
1439 // compressible, and this can help expose CSE opportunities in the sdiv by
1440 // constant optimization.
1441 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1442 if (!N1C)
1443 break;
1444 SDValue N0 = Node->getOperand(0);
1445 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1446 break;
1447 unsigned ShAmt = N1C->getZExtValue();
1448 unsigned ExtSize =
1449 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1450 // ExtSize of 32 should use sraiw via tablegen pattern.
1451 if (ExtSize >= 32 || ShAmt >= ExtSize)
1452 break;
1453 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1454 SDNode *SLLI =
1455 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1456 CurDAG->getTargetConstant(LShAmt, DL, VT));
1457 SDNode *SRAI = CurDAG->getMachineNode(
1458 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1459 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1460 ReplaceNode(Node, SRAI);
1461 return;
1462 }
1464 // Optimize (sext_inreg (srl X, C), i8/i16) ->
1465 // (srai (slli X, XLen-ExtSize-C), XLen-ExtSize)
1466 // This is a bitfield extract pattern where we're extracting a signed
1467 // 8-bit or 16-bit field from position C.
1468 SDValue N0 = Node->getOperand(0);
1469 if (N0.getOpcode() != ISD::SRL || !N0.hasOneUse())
1470 break;
1471
1472 auto *ShAmtC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1473 if (!ShAmtC)
1474 break;
1475
1476 unsigned ExtSize =
1477 cast<VTSDNode>(Node->getOperand(1))->getVT().getSizeInBits();
1478 unsigned ShAmt = ShAmtC->getZExtValue();
1479 unsigned XLen = Subtarget->getXLen();
1480
1481 // Only handle types less than 32, and make sure the shift amount is valid.
1482 if (ExtSize >= 32 || ShAmt >= XLen - ExtSize)
1483 break;
1484
1485 unsigned LShAmt = XLen - ExtSize - ShAmt;
1486 SDNode *SLLI =
1487 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1488 CurDAG->getTargetConstant(LShAmt, DL, VT));
1489 SDNode *SRAI = CurDAG->getMachineNode(
1490 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1491 CurDAG->getTargetConstant(XLen - ExtSize, DL, VT));
1492 ReplaceNode(Node, SRAI);
1493 return;
1494 }
1495 case ISD::OR: {
1497 return;
1498
1499 break;
1500 }
1501 case ISD::XOR:
1503 return;
1504
1505 break;
1506 case ISD::AND: {
1507 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1508 if (!N1C)
1509 break;
1510
1511 SDValue N0 = Node->getOperand(0);
1512
1513 bool LeftShift = N0.getOpcode() == ISD::SHL;
1514 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1515 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1516 if (!C)
1517 break;
1518 unsigned C2 = C->getZExtValue();
1519 unsigned XLen = Subtarget->getXLen();
1520 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1521
1522 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1523 // shift pair might offer more compression opportunities.
1524 // TODO: We could check for C extension here, but we don't have many lit
1525 // tests with the C extension enabled so not checking gets better
1526 // coverage.
1527 // TODO: What if ANDI faster than shift?
1528 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1529
1530 uint64_t C1 = N1C->getZExtValue();
1531
1532 // Clear irrelevant bits in the mask.
1533 if (LeftShift)
1535 else
1536 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1537
1538 // Some transforms should only be done if the shift has a single use or
1539 // the AND would become (srli (slli X, 32), 32)
1540 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1541
1542 SDValue X = N0.getOperand(0);
1543
1544 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1545 // with c3 leading zeros.
1546 if (!LeftShift && isMask_64(C1)) {
1547 unsigned Leading = XLen - llvm::bit_width(C1);
1548 if (C2 < Leading) {
1549 // If the number of leading zeros is C2+32 this can be SRLIW.
1550 if (C2 + 32 == Leading) {
1551 SDNode *SRLIW = CurDAG->getMachineNode(
1552 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1553 ReplaceNode(Node, SRLIW);
1554 return;
1555 }
1556
1557 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1558 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1559 //
1560 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1561 // legalized and goes through DAG combine.
1562 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1563 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1564 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1565 SDNode *SRAIW =
1566 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1567 CurDAG->getTargetConstant(31, DL, VT));
1568 SDNode *SRLIW = CurDAG->getMachineNode(
1569 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1570 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1571 ReplaceNode(Node, SRLIW);
1572 return;
1573 }
1574
1575 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1576 // available.
1577 // Transform (and (srl x, C2), C1)
1578 // -> (<bfextract> x, msb, lsb)
1579 //
1580 // Make sure to keep this below the SRLIW cases, as we always want to
1581 // prefer the more common instruction.
1582 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1583 const unsigned Lsb = C2;
1584 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1585 return;
1586
1587 // (srli (slli x, c3-c2), c3).
1588 // Skip if we could use (zext.w (sraiw X, C2)).
1589 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1590 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1591 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1592 // Also Skip if we can use bexti or th.tst.
1593 Skip |= HasBitTest && Leading == XLen - 1;
1594 if (OneUseOrZExtW && !Skip) {
1595 SDNode *SLLI = CurDAG->getMachineNode(
1596 RISCV::SLLI, DL, VT, X,
1597 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1598 SDNode *SRLI = CurDAG->getMachineNode(
1599 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1600 CurDAG->getTargetConstant(Leading, DL, VT));
1601 ReplaceNode(Node, SRLI);
1602 return;
1603 }
1604 }
1605 }
1606
1607 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1608 // shifted by c2 bits with c3 leading zeros.
1609 if (LeftShift && isShiftedMask_64(C1)) {
1610 unsigned Leading = XLen - llvm::bit_width(C1);
1611
1612 if (C2 + Leading < XLen &&
1613 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1614 // Use slli.uw when possible.
1615 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1616 SDNode *SLLI_UW =
1617 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1618 CurDAG->getTargetConstant(C2, DL, VT));
1619 ReplaceNode(Node, SLLI_UW);
1620 return;
1621 }
1622
1623 // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
1624 // available.
1625 // Transform (and (shl x, c2), c1)
1626 // -> (<bfinsert> x, msb, lsb)
1627 // e.g.
1628 // (and (shl x, 12), 0x00fff000)
1629 // If XLen = 32 and C2 = 12, then
1630 // Msb = 32 - 8 - 1 = 23 and Lsb = 12
1631 const unsigned Msb = XLen - Leading - 1;
1632 const unsigned Lsb = C2;
1633 if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
1634 return;
1635
1636 if (OneUseOrZExtW && !IsCANDI) {
1637 // (packh x0, X)
1638 if (Subtarget->hasStdExtZbkb() && C1 == 0xff00 && C2 == 8) {
1639 SDNode *PACKH = CurDAG->getMachineNode(
1640 RISCV::PACKH, DL, VT,
1641 CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()), X);
1642 ReplaceNode(Node, PACKH);
1643 return;
1644 }
1645 // (srli (slli c2+c3), c3)
1646 SDNode *SLLI = CurDAG->getMachineNode(
1647 RISCV::SLLI, DL, VT, X,
1648 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1649 SDNode *SRLI = CurDAG->getMachineNode(
1650 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1651 CurDAG->getTargetConstant(Leading, DL, VT));
1652 ReplaceNode(Node, SRLI);
1653 return;
1654 }
1655 }
1656 }
1657
1658 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1659 // shifted mask with c2 leading zeros and c3 trailing zeros.
1660 if (!LeftShift && isShiftedMask_64(C1)) {
1661 unsigned Leading = XLen - llvm::bit_width(C1);
1662 unsigned Trailing = llvm::countr_zero(C1);
1663 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1664 !IsCANDI) {
1665 unsigned SrliOpc = RISCV::SRLI;
1666 // If the input is zexti32 we should use SRLIW.
1667 if (X.getOpcode() == ISD::AND &&
1668 isa<ConstantSDNode>(X.getOperand(1)) &&
1669 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1670 SrliOpc = RISCV::SRLIW;
1671 X = X.getOperand(0);
1672 }
1673 SDNode *SRLI = CurDAG->getMachineNode(
1674 SrliOpc, DL, VT, X,
1675 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1676 SDNode *SLLI = CurDAG->getMachineNode(
1677 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1678 CurDAG->getTargetConstant(Trailing, DL, VT));
1679 ReplaceNode(Node, SLLI);
1680 return;
1681 }
1682 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1683 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1684 OneUseOrZExtW && !IsCANDI) {
1685 SDNode *SRLIW = CurDAG->getMachineNode(
1686 RISCV::SRLIW, DL, VT, X,
1687 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1688 SDNode *SLLI = CurDAG->getMachineNode(
1689 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1690 CurDAG->getTargetConstant(Trailing, DL, VT));
1691 ReplaceNode(Node, SLLI);
1692 return;
1693 }
1694 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1695 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1696 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1697 SDNode *SRLI = CurDAG->getMachineNode(
1698 RISCV::SRLI, DL, VT, X,
1699 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1700 SDNode *SLLI_UW = CurDAG->getMachineNode(
1701 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1702 CurDAG->getTargetConstant(Trailing, DL, VT));
1703 ReplaceNode(Node, SLLI_UW);
1704 return;
1705 }
1706 }
1707
1708 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1709 // shifted mask with no leading zeros and c3 trailing zeros.
1710 if (LeftShift && isShiftedMask_64(C1)) {
1711 unsigned Leading = XLen - llvm::bit_width(C1);
1712 unsigned Trailing = llvm::countr_zero(C1);
1713 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1714 SDNode *SRLI = CurDAG->getMachineNode(
1715 RISCV::SRLI, DL, VT, X,
1716 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1717 SDNode *SLLI = CurDAG->getMachineNode(
1718 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1719 CurDAG->getTargetConstant(Trailing, DL, VT));
1720 ReplaceNode(Node, SLLI);
1721 return;
1722 }
1723 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1724 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1725 SDNode *SRLIW = CurDAG->getMachineNode(
1726 RISCV::SRLIW, DL, VT, X,
1727 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1728 SDNode *SLLI = CurDAG->getMachineNode(
1729 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1730 CurDAG->getTargetConstant(Trailing, DL, VT));
1731 ReplaceNode(Node, SLLI);
1732 return;
1733 }
1734
1735 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1736 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1737 Subtarget->hasStdExtZba()) {
1738 SDNode *SRLI = CurDAG->getMachineNode(
1739 RISCV::SRLI, DL, VT, X,
1740 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1741 SDNode *SLLI_UW = CurDAG->getMachineNode(
1742 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1743 CurDAG->getTargetConstant(Trailing, DL, VT));
1744 ReplaceNode(Node, SLLI_UW);
1745 return;
1746 }
1747 }
1748 }
1749
1750 const uint64_t C1 = N1C->getZExtValue();
1751
1752 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1753 N0.hasOneUse()) {
1754 unsigned C2 = N0.getConstantOperandVal(1);
1755 unsigned XLen = Subtarget->getXLen();
1756 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1757
1758 SDValue X = N0.getOperand(0);
1759
1760 // Prefer SRAIW + ANDI when possible.
1761 bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1762 X.getOpcode() == ISD::SHL &&
1763 isa<ConstantSDNode>(X.getOperand(1)) &&
1764 X.getConstantOperandVal(1) == 32;
1765 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1766 // mask with c3 leading zeros and c2 is larger than c3.
1767 if (isMask_64(C1) && !Skip) {
1768 unsigned Leading = XLen - llvm::bit_width(C1);
1769 if (C2 > Leading) {
1770 SDNode *SRAI = CurDAG->getMachineNode(
1771 RISCV::SRAI, DL, VT, X,
1772 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1773 SDNode *SRLI = CurDAG->getMachineNode(
1774 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1775 CurDAG->getTargetConstant(Leading, DL, VT));
1776 ReplaceNode(Node, SRLI);
1777 return;
1778 }
1779 }
1780
1781 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1782 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1783 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1784 if (isShiftedMask_64(C1) && !Skip) {
1785 unsigned Leading = XLen - llvm::bit_width(C1);
1786 unsigned Trailing = llvm::countr_zero(C1);
1787 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1788 SDNode *SRAI = CurDAG->getMachineNode(
1789 RISCV::SRAI, DL, VT, N0.getOperand(0),
1790 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1791 SDNode *SRLI = CurDAG->getMachineNode(
1792 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1793 CurDAG->getTargetConstant(Leading + Trailing, DL, VT));
1794 SDNode *SLLI = CurDAG->getMachineNode(
1795 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1796 CurDAG->getTargetConstant(Trailing, DL, VT));
1797 ReplaceNode(Node, SLLI);
1798 return;
1799 }
1800 }
1801 }
1802
1803 // If C1 masks off the upper bits only (but can't be formed as an
1804 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1805 // available.
1806 // Transform (and x, C1)
1807 // -> (<bfextract> x, msb, lsb)
1808 if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue()) &&
1809 !(C1 == 0xffff && Subtarget->hasStdExtZbb()) &&
1810 !(C1 == 0xffffffff && Subtarget->hasStdExtZba())) {
1811 const unsigned Msb = llvm::bit_width(C1) - 1;
1812 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1813 return;
1814 }
1815
1817 return;
1818
1819 break;
1820 }
1821 case ISD::MUL: {
1822 // Special case for calculating (mul (and X, C2), C1) where the full product
1823 // fits in XLen bits. We can shift X left by the number of leading zeros in
1824 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1825 // product has XLen trailing zeros, putting it in the output of MULHU. This
1826 // can avoid materializing a constant in a register for C2.
1827
1828 // RHS should be a constant.
1829 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1830 if (!N1C || !N1C->hasOneUse())
1831 break;
1832
1833 // LHS should be an AND with constant.
1834 SDValue N0 = Node->getOperand(0);
1835 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1836 break;
1837
1839
1840 // Constant should be a mask.
1841 if (!isMask_64(C2))
1842 break;
1843
1844 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1845 // multiple users or the constant is a simm12. This prevents inserting a
1846 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1847 // make it more costly to materialize. Otherwise, using a SLLI might allow
1848 // it to be compressed.
1849 bool IsANDIOrZExt =
1850 isInt<12>(C2) ||
1851 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1852 // With XTHeadBb, we can use TH.EXTU.
1853 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1854 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1855 break;
1856 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1857 // the constant is a simm32.
1858 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1859 // With XTHeadBb, we can use TH.EXTU.
1860 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1861 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1862 break;
1863
1864 // We need to shift left the AND input and C1 by a total of XLen bits.
1865
1866 // How far left do we need to shift the AND input?
1867 unsigned XLen = Subtarget->getXLen();
1868 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1869
1870 // The constant gets shifted by the remaining amount unless that would
1871 // shift bits out.
1872 uint64_t C1 = N1C->getZExtValue();
1873 unsigned ConstantShift = XLen - LeadingZeros;
1874 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1875 break;
1876
1877 uint64_t ShiftedC1 = C1 << ConstantShift;
1878 // If this RV32, we need to sign extend the constant.
1879 if (XLen == 32)
1880 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1881
1882 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1883 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1884 SDNode *SLLI =
1885 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1886 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1887 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1888 SDValue(SLLI, 0), SDValue(Imm, 0));
1889 ReplaceNode(Node, MULHU);
1890 return;
1891 }
1892 case ISD::SMUL_LOHI:
1893 case ISD::UMUL_LOHI:
1894 case RISCVISD::WMULSU:
1895 case RISCVISD::WADDU:
1896 case RISCVISD::WSUBU: {
1897 assert(Subtarget->hasStdExtP() && !Subtarget->is64Bit() && VT == MVT::i32 &&
1898 "Unexpected opcode");
1899
1900 unsigned Opc;
1901 switch (Node->getOpcode()) {
1902 default:
1903 llvm_unreachable("Unexpected opcode");
1904 case ISD::SMUL_LOHI:
1905 Opc = RISCV::WMUL;
1906 break;
1907 case ISD::UMUL_LOHI:
1908 Opc = RISCV::WMULU;
1909 break;
1910 case RISCVISD::WMULSU:
1911 Opc = RISCV::WMULSU;
1912 break;
1913 case RISCVISD::WADDU:
1914 Opc = RISCV::WADDU;
1915 break;
1916 case RISCVISD::WSUBU:
1917 Opc = RISCV::WSUBU;
1918 break;
1919 }
1920
1921 SDNode *Result = CurDAG->getMachineNode(
1922 Opc, DL, MVT::Untyped, Node->getOperand(0), Node->getOperand(1));
1923
1924 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(Result, 0));
1925 ReplaceUses(SDValue(Node, 0), Lo);
1926 ReplaceUses(SDValue(Node, 1), Hi);
1927 CurDAG->RemoveDeadNode(Node);
1928 return;
1929 }
1930 case RISCVISD::WSLL:
1931 case RISCVISD::WSLA: {
1932 // Custom select WSLL/WSLA for RV32P.
1933 assert(Subtarget->hasStdExtP() && !Subtarget->is64Bit() && VT == MVT::i32 &&
1934 "Unexpected opcode");
1935
1936 bool IsSigned = Node->getOpcode() == RISCVISD::WSLA;
1937
1938 SDValue ShAmt = Node->getOperand(1);
1939
1940 unsigned Opc;
1941
1942 auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt);
1943 if (ShAmtC && ShAmtC->getZExtValue() < 64) {
1944 Opc = IsSigned ? RISCV::WSLAI : RISCV::WSLLI;
1945 ShAmt = CurDAG->getTargetConstant(ShAmtC->getZExtValue(), DL, XLenVT);
1946 } else {
1947 Opc = IsSigned ? RISCV::WSLA : RISCV::WSLL;
1948 }
1949
1950 SDNode *WShift = CurDAG->getMachineNode(Opc, DL, MVT::Untyped,
1951 Node->getOperand(0), ShAmt);
1952
1953 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(WShift, 0));
1954 ReplaceUses(SDValue(Node, 0), Lo);
1955 ReplaceUses(SDValue(Node, 1), Hi);
1956 CurDAG->RemoveDeadNode(Node);
1957 return;
1958 }
1959 case ISD::LOAD: {
1960 if (tryIndexedLoad(Node))
1961 return;
1962
1963 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1964 // We match post-incrementing load here
1966 if (Load->getAddressingMode() != ISD::POST_INC)
1967 break;
1968
1969 SDValue Chain = Node->getOperand(0);
1970 SDValue Base = Node->getOperand(1);
1971 SDValue Offset = Node->getOperand(2);
1972
1973 bool Simm12 = false;
1974 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1975
1976 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1977 int ConstantVal = ConstantOffset->getSExtValue();
1978 Simm12 = isInt<12>(ConstantVal);
1979 if (Simm12)
1980 Offset = CurDAG->getSignedTargetConstant(ConstantVal, SDLoc(Offset),
1981 Offset.getValueType());
1982 }
1983
1984 unsigned Opcode = 0;
1985 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1986 case MVT::i8:
1987 if (Simm12 && SignExtend)
1988 Opcode = RISCV::CV_LB_ri_inc;
1989 else if (Simm12 && !SignExtend)
1990 Opcode = RISCV::CV_LBU_ri_inc;
1991 else if (!Simm12 && SignExtend)
1992 Opcode = RISCV::CV_LB_rr_inc;
1993 else
1994 Opcode = RISCV::CV_LBU_rr_inc;
1995 break;
1996 case MVT::i16:
1997 if (Simm12 && SignExtend)
1998 Opcode = RISCV::CV_LH_ri_inc;
1999 else if (Simm12 && !SignExtend)
2000 Opcode = RISCV::CV_LHU_ri_inc;
2001 else if (!Simm12 && SignExtend)
2002 Opcode = RISCV::CV_LH_rr_inc;
2003 else
2004 Opcode = RISCV::CV_LHU_rr_inc;
2005 break;
2006 case MVT::i32:
2007 if (Simm12)
2008 Opcode = RISCV::CV_LW_ri_inc;
2009 else
2010 Opcode = RISCV::CV_LW_rr_inc;
2011 break;
2012 default:
2013 break;
2014 }
2015 if (!Opcode)
2016 break;
2017
2018 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
2019 Chain.getSimpleValueType(), Base,
2020 Offset, Chain));
2021 return;
2022 }
2023 break;
2024 }
2025 case RISCVISD::LD_RV32: {
2026 assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
2027
2029 SDValue Chain = Node->getOperand(0);
2030 SDValue Addr = Node->getOperand(1);
2032
2033 SDValue Ops[] = {Base, Offset, Chain};
2034 MachineSDNode *New = CurDAG->getMachineNode(
2035 RISCV::LD_RV32, DL, {MVT::Untyped, MVT::Other}, Ops);
2036 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(New, 0));
2037 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
2038 ReplaceUses(SDValue(Node, 0), Lo);
2039 ReplaceUses(SDValue(Node, 1), Hi);
2040 ReplaceUses(SDValue(Node, 2), SDValue(New, 1));
2041 CurDAG->RemoveDeadNode(Node);
2042 return;
2043 }
2044 case RISCVISD::SD_RV32: {
2046 SDValue Chain = Node->getOperand(0);
2047 SDValue Addr = Node->getOperand(3);
2049
2050 SDValue Lo = Node->getOperand(1);
2051 SDValue Hi = Node->getOperand(2);
2052
2053 SDValue RegPair;
2054 // Peephole to use X0_Pair for storing zero.
2056 RegPair = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
2057 } else {
2058 RegPair = buildGPRPair(CurDAG, DL, MVT::Untyped, Lo, Hi);
2059 }
2060
2061 MachineSDNode *New = CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other,
2062 {RegPair, Base, Offset, Chain});
2063 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
2064 ReplaceUses(SDValue(Node, 0), SDValue(New, 0));
2065 CurDAG->RemoveDeadNode(Node);
2066 return;
2067 }
2068 case RISCVISD::ADDD:
2069 // Try to match WMACC pattern: ADDD where one operand pair comes from a
2070 // widening multiply.
2072 return;
2073
2074 // Fall through to regular ADDD selection.
2075 [[fallthrough]];
2076 case RISCVISD::SUBD:
2077 case RISCVISD::PPAIRE_DB:
2078 case RISCVISD::WADDAU:
2079 case RISCVISD::WSUBAU:
2080 case RISCVISD::WADDA:
2081 case RISCVISD::WSUBA: {
2082 assert(!Subtarget->is64Bit() && "Unexpected opcode");
2083 assert(
2084 (Node->getOpcode() != RISCVISD::PPAIRE_DB || Subtarget->hasStdExtP()) &&
2085 "Unexpected opcode");
2086
2087 SDValue Op0Lo = Node->getOperand(0);
2088 SDValue Op0Hi = Node->getOperand(1);
2089
2090 SDValue Op0;
2091 if (isNullConstant(Op0Lo) && isNullConstant(Op0Hi)) {
2092 Op0 = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
2093 } else {
2094 Op0 = buildGPRPair(CurDAG, DL, MVT::Untyped, Op0Lo, Op0Hi);
2095 }
2096
2097 SDValue Op1Lo = Node->getOperand(2);
2098 SDValue Op1Hi = Node->getOperand(3);
2099
2100 MachineSDNode *New;
2101 if (Opcode == RISCVISD::WADDAU || Opcode == RISCVISD::WSUBAU ||
2102 Opcode == RISCVISD::WADDA || Opcode == RISCVISD::WSUBA) {
2103 // Widening accumulate: Op0 is the accumulator (GPRPair), Op1Lo and Op1Hi
2104 // are the two 32-bit values.
2105 unsigned Opc;
2106 switch (Opcode) {
2107 default:
2108 llvm_unreachable("Unexpected opcode");
2109 case RISCVISD::WADDAU:
2110 Opc = RISCV::WADDAU;
2111 break;
2112 case RISCVISD::WSUBAU:
2113 Opc = RISCV::WSUBAU;
2114 break;
2115 case RISCVISD::WADDA:
2116 Opc = RISCV::WADDA;
2117 break;
2118 case RISCVISD::WSUBA:
2119 Opc = RISCV::WSUBA;
2120 break;
2121 }
2122 New = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Op0, Op1Lo, Op1Hi);
2123 } else {
2124 SDValue Op1 = buildGPRPair(CurDAG, DL, MVT::Untyped, Op1Lo, Op1Hi);
2125
2126 unsigned Opc;
2127 switch (Opcode) {
2128 default:
2129 llvm_unreachable("Unexpected opcode");
2130 case RISCVISD::ADDD:
2131 Opc = RISCV::ADDD;
2132 break;
2133 case RISCVISD::SUBD:
2134 Opc = RISCV::SUBD;
2135 break;
2136 case RISCVISD::PPAIRE_DB:
2137 Opc = RISCV::PPAIRE_DB;
2138 break;
2139 }
2140 New = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Op0, Op1);
2141 }
2142
2143 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(New, 0));
2144 ReplaceUses(SDValue(Node, 0), Lo);
2145 ReplaceUses(SDValue(Node, 1), Hi);
2146 CurDAG->RemoveDeadNode(Node);
2147 return;
2148 }
2150 unsigned IntNo = Node->getConstantOperandVal(0);
2151 switch (IntNo) {
2152 // By default we do not custom select any intrinsic.
2153 default:
2154 break;
2155 case Intrinsic::riscv_vmsgeu:
2156 case Intrinsic::riscv_vmsge: {
2157 SDValue Src1 = Node->getOperand(1);
2158 SDValue Src2 = Node->getOperand(2);
2159 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
2160 bool IsCmpConstant = false;
2161 bool IsCmpMinimum = false;
2162 // Only custom select scalar second operand.
2163 if (Src2.getValueType() != XLenVT)
2164 break;
2165 // Small constants are handled with patterns.
2166 int64_t CVal = 0;
2167 MVT Src1VT = Src1.getSimpleValueType();
2168 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
2169 IsCmpConstant = true;
2170 CVal = C->getSExtValue();
2171 if (CVal >= -15 && CVal <= 16) {
2172 if (!IsUnsigned || CVal != 0)
2173 break;
2174 IsCmpMinimum = true;
2175 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2176 Src1VT.getScalarSizeInBits())
2177 .getSExtValue()) {
2178 IsCmpMinimum = true;
2179 }
2180 }
2181 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
2182 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
2183 default:
2184 llvm_unreachable("Unexpected LMUL!");
2185#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2186 case RISCVVType::lmulenum: \
2187 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2188 : RISCV::PseudoVMSLT_VX_##suffix; \
2189 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
2190 : RISCV::PseudoVMSGT_VX_##suffix; \
2191 break;
2192 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2193 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2194 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2195 CASE_VMSLT_OPCODES(LMUL_1, M1)
2196 CASE_VMSLT_OPCODES(LMUL_2, M2)
2197 CASE_VMSLT_OPCODES(LMUL_4, M4)
2198 CASE_VMSLT_OPCODES(LMUL_8, M8)
2199#undef CASE_VMSLT_OPCODES
2200 }
2201 // Mask operations use the LMUL from the mask type.
2202 switch (RISCVTargetLowering::getLMUL(VT)) {
2203 default:
2204 llvm_unreachable("Unexpected LMUL!");
2205#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
2206 case RISCVVType::lmulenum: \
2207 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
2208 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
2209 break;
2210 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
2211 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
2212 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
2213 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
2214 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
2215 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
2216 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
2217#undef CASE_VMNAND_VMSET_OPCODES
2218 }
2219 SDValue SEW = CurDAG->getTargetConstant(
2220 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
2221 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
2222 SDValue VL;
2223 selectVLOp(Node->getOperand(3), VL);
2224
2225 // If vmsge(u) with minimum value, expand it to vmset.
2226 if (IsCmpMinimum) {
2228 CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, MaskSEW));
2229 return;
2230 }
2231
2232 if (IsCmpConstant) {
2233 SDValue Imm =
2234 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2235
2236 ReplaceNode(Node, CurDAG->getMachineNode(VMSGTOpcode, DL, VT,
2237 {Src1, Imm, VL, SEW}));
2238 return;
2239 }
2240
2241 // Expand to
2242 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
2243 SDValue Cmp = SDValue(
2244 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2245 0);
2246 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
2247 {Cmp, Cmp, VL, MaskSEW}));
2248 return;
2249 }
2250 case Intrinsic::riscv_vmsgeu_mask:
2251 case Intrinsic::riscv_vmsge_mask: {
2252 SDValue Src1 = Node->getOperand(2);
2253 SDValue Src2 = Node->getOperand(3);
2254 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
2255 bool IsCmpConstant = false;
2256 bool IsCmpMinimum = false;
2257 // Only custom select scalar second operand.
2258 if (Src2.getValueType() != XLenVT)
2259 break;
2260 // Small constants are handled with patterns.
2261 MVT Src1VT = Src1.getSimpleValueType();
2262 int64_t CVal = 0;
2263 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
2264 IsCmpConstant = true;
2265 CVal = C->getSExtValue();
2266 if (CVal >= -15 && CVal <= 16) {
2267 if (!IsUnsigned || CVal != 0)
2268 break;
2269 IsCmpMinimum = true;
2270 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2271 Src1VT.getScalarSizeInBits())
2272 .getSExtValue()) {
2273 IsCmpMinimum = true;
2274 }
2275 }
2276 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
2277 VMOROpcode, VMSGTMaskOpcode;
2278 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
2279 default:
2280 llvm_unreachable("Unexpected LMUL!");
2281#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2282 case RISCVVType::lmulenum: \
2283 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2284 : RISCV::PseudoVMSLT_VX_##suffix; \
2285 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
2286 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
2287 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
2288 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
2289 break;
2290 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2291 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2292 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2293 CASE_VMSLT_OPCODES(LMUL_1, M1)
2294 CASE_VMSLT_OPCODES(LMUL_2, M2)
2295 CASE_VMSLT_OPCODES(LMUL_4, M4)
2296 CASE_VMSLT_OPCODES(LMUL_8, M8)
2297#undef CASE_VMSLT_OPCODES
2298 }
2299 // Mask operations use the LMUL from the mask type.
2300 switch (RISCVTargetLowering::getLMUL(VT)) {
2301 default:
2302 llvm_unreachable("Unexpected LMUL!");
2303#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
2304 case RISCVVType::lmulenum: \
2305 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
2306 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
2307 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
2308 break;
2309 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
2310 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
2311 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
2316#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
2317 }
2318 SDValue SEW = CurDAG->getTargetConstant(
2319 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
2320 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
2321 SDValue VL;
2322 selectVLOp(Node->getOperand(5), VL);
2323 SDValue MaskedOff = Node->getOperand(1);
2324 SDValue Mask = Node->getOperand(4);
2325
2326 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
2327 if (IsCmpMinimum) {
2328 // We don't need vmor if the MaskedOff and the Mask are the same
2329 // value.
2330 if (Mask == MaskedOff) {
2331 ReplaceUses(Node, Mask.getNode());
2332 return;
2333 }
2335 CurDAG->getMachineNode(VMOROpcode, DL, VT,
2336 {Mask, MaskedOff, VL, MaskSEW}));
2337 return;
2338 }
2339
2340 // If the MaskedOff value and the Mask are the same value use
2341 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
2342 // This avoids needing to copy v0 to vd before starting the next sequence.
2343 if (Mask == MaskedOff) {
2344 SDValue Cmp = SDValue(
2345 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2346 0);
2347 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
2348 {Mask, Cmp, VL, MaskSEW}));
2349 return;
2350 }
2351
2352 SDValue PolicyOp =
2353 CurDAG->getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT);
2354
2355 if (IsCmpConstant) {
2356 SDValue Imm =
2357 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2358
2359 ReplaceNode(Node, CurDAG->getMachineNode(
2360 VMSGTMaskOpcode, DL, VT,
2361 {MaskedOff, Src1, Imm, Mask, VL, SEW, PolicyOp}));
2362 return;
2363 }
2364
2365 // Otherwise use
2366 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
2367 // The result is mask undisturbed.
2368 // We use the same instructions to emulate mask agnostic behavior, because
2369 // the agnostic result can be either undisturbed or all 1.
2370 SDValue Cmp = SDValue(CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
2371 {MaskedOff, Src1, Src2, Mask,
2372 VL, SEW, PolicyOp}),
2373 0);
2374 // vmxor.mm vd, vd, v0 is used to update active value.
2375 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
2376 {Cmp, Mask, VL, MaskSEW}));
2377 return;
2378 }
2379 case Intrinsic::riscv_vsetvli:
2380 case Intrinsic::riscv_vsetvlimax:
2381 return selectVSETVLI(Node);
2382 case Intrinsic::riscv_sf_vsettnt:
2383 case Intrinsic::riscv_sf_vsettm:
2384 case Intrinsic::riscv_sf_vsettk:
2385 return selectXSfmmVSET(Node);
2386 }
2387 break;
2388 }
2390 unsigned IntNo = Node->getConstantOperandVal(1);
2391 switch (IntNo) {
2392 // By default we do not custom select any intrinsic.
2393 default:
2394 break;
2395 case Intrinsic::riscv_vlseg2:
2396 case Intrinsic::riscv_vlseg3:
2397 case Intrinsic::riscv_vlseg4:
2398 case Intrinsic::riscv_vlseg5:
2399 case Intrinsic::riscv_vlseg6:
2400 case Intrinsic::riscv_vlseg7:
2401 case Intrinsic::riscv_vlseg8: {
2402 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2403 /*IsStrided*/ false);
2404 return;
2405 }
2406 case Intrinsic::riscv_vlseg2_mask:
2407 case Intrinsic::riscv_vlseg3_mask:
2408 case Intrinsic::riscv_vlseg4_mask:
2409 case Intrinsic::riscv_vlseg5_mask:
2410 case Intrinsic::riscv_vlseg6_mask:
2411 case Intrinsic::riscv_vlseg7_mask:
2412 case Intrinsic::riscv_vlseg8_mask: {
2413 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2414 /*IsStrided*/ false);
2415 return;
2416 }
2417 case Intrinsic::riscv_vlsseg2:
2418 case Intrinsic::riscv_vlsseg3:
2419 case Intrinsic::riscv_vlsseg4:
2420 case Intrinsic::riscv_vlsseg5:
2421 case Intrinsic::riscv_vlsseg6:
2422 case Intrinsic::riscv_vlsseg7:
2423 case Intrinsic::riscv_vlsseg8: {
2424 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2425 /*IsStrided*/ true);
2426 return;
2427 }
2428 case Intrinsic::riscv_vlsseg2_mask:
2429 case Intrinsic::riscv_vlsseg3_mask:
2430 case Intrinsic::riscv_vlsseg4_mask:
2431 case Intrinsic::riscv_vlsseg5_mask:
2432 case Intrinsic::riscv_vlsseg6_mask:
2433 case Intrinsic::riscv_vlsseg7_mask:
2434 case Intrinsic::riscv_vlsseg8_mask: {
2435 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2436 /*IsStrided*/ true);
2437 return;
2438 }
2439 case Intrinsic::riscv_vloxseg2:
2440 case Intrinsic::riscv_vloxseg3:
2441 case Intrinsic::riscv_vloxseg4:
2442 case Intrinsic::riscv_vloxseg5:
2443 case Intrinsic::riscv_vloxseg6:
2444 case Intrinsic::riscv_vloxseg7:
2445 case Intrinsic::riscv_vloxseg8:
2446 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2447 /*IsOrdered*/ true);
2448 return;
2449 case Intrinsic::riscv_vluxseg2:
2450 case Intrinsic::riscv_vluxseg3:
2451 case Intrinsic::riscv_vluxseg4:
2452 case Intrinsic::riscv_vluxseg5:
2453 case Intrinsic::riscv_vluxseg6:
2454 case Intrinsic::riscv_vluxseg7:
2455 case Intrinsic::riscv_vluxseg8:
2456 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2457 /*IsOrdered*/ false);
2458 return;
2459 case Intrinsic::riscv_vloxseg2_mask:
2460 case Intrinsic::riscv_vloxseg3_mask:
2461 case Intrinsic::riscv_vloxseg4_mask:
2462 case Intrinsic::riscv_vloxseg5_mask:
2463 case Intrinsic::riscv_vloxseg6_mask:
2464 case Intrinsic::riscv_vloxseg7_mask:
2465 case Intrinsic::riscv_vloxseg8_mask:
2466 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2467 /*IsOrdered*/ true);
2468 return;
2469 case Intrinsic::riscv_vluxseg2_mask:
2470 case Intrinsic::riscv_vluxseg3_mask:
2471 case Intrinsic::riscv_vluxseg4_mask:
2472 case Intrinsic::riscv_vluxseg5_mask:
2473 case Intrinsic::riscv_vluxseg6_mask:
2474 case Intrinsic::riscv_vluxseg7_mask:
2475 case Intrinsic::riscv_vluxseg8_mask:
2476 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2477 /*IsOrdered*/ false);
2478 return;
2479 case Intrinsic::riscv_vlseg8ff:
2480 case Intrinsic::riscv_vlseg7ff:
2481 case Intrinsic::riscv_vlseg6ff:
2482 case Intrinsic::riscv_vlseg5ff:
2483 case Intrinsic::riscv_vlseg4ff:
2484 case Intrinsic::riscv_vlseg3ff:
2485 case Intrinsic::riscv_vlseg2ff: {
2486 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ false);
2487 return;
2488 }
2489 case Intrinsic::riscv_vlseg8ff_mask:
2490 case Intrinsic::riscv_vlseg7ff_mask:
2491 case Intrinsic::riscv_vlseg6ff_mask:
2492 case Intrinsic::riscv_vlseg5ff_mask:
2493 case Intrinsic::riscv_vlseg4ff_mask:
2494 case Intrinsic::riscv_vlseg3ff_mask:
2495 case Intrinsic::riscv_vlseg2ff_mask: {
2496 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ true);
2497 return;
2498 }
2499 case Intrinsic::riscv_vloxei:
2500 case Intrinsic::riscv_vloxei_mask:
2501 case Intrinsic::riscv_vluxei:
2502 case Intrinsic::riscv_vluxei_mask: {
2503 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
2504 IntNo == Intrinsic::riscv_vluxei_mask;
2505 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
2506 IntNo == Intrinsic::riscv_vloxei_mask;
2507
2508 MVT VT = Node->getSimpleValueType(0);
2509 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2510
2511 unsigned CurOp = 2;
2512 SmallVector<SDValue, 8> Operands;
2513 Operands.push_back(Node->getOperand(CurOp++));
2514
2515 MVT IndexVT;
2516 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2517 /*IsStridedOrIndexed*/ true, Operands,
2518 /*IsLoad=*/true, &IndexVT);
2519
2521 "Element count mismatch");
2522
2525 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2526 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2527 reportFatalUsageError("The V extension does not support EEW=64 for "
2528 "index values when XLEN=32");
2529 }
2530 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2531 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
2532 static_cast<unsigned>(IndexLMUL));
2533 MachineSDNode *Load =
2534 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2535
2536 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2537
2538 ReplaceNode(Node, Load);
2539 return;
2540 }
2541 case Intrinsic::riscv_vlm:
2542 case Intrinsic::riscv_vle:
2543 case Intrinsic::riscv_vle_mask:
2544 case Intrinsic::riscv_vlse:
2545 case Intrinsic::riscv_vlse_mask: {
2546 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2547 IntNo == Intrinsic::riscv_vlse_mask;
2548 bool IsStrided =
2549 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2550
2551 MVT VT = Node->getSimpleValueType(0);
2552 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2553
2554 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2555 // operand at the IR level. In pseudos, they have both policy and
2556 // passthru operand. The passthru operand is needed to track the
2557 // "tail undefined" state, and the policy is there just for
2558 // for consistency - it will always be "don't care" for the
2559 // unmasked form.
2560 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2561 unsigned CurOp = 2;
2562 SmallVector<SDValue, 8> Operands;
2563 if (HasPassthruOperand)
2564 Operands.push_back(Node->getOperand(CurOp++));
2565 else {
2566 // We eagerly lower to implicit_def (instead of undef), as we
2567 // otherwise fail to select nodes such as: nxv1i1 = undef
2568 SDNode *Passthru =
2569 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
2570 Operands.push_back(SDValue(Passthru, 0));
2571 }
2572 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2573 Operands, /*IsLoad=*/true);
2574
2576 const RISCV::VLEPseudo *P =
2577 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2578 static_cast<unsigned>(LMUL));
2579 MachineSDNode *Load =
2580 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2581
2582 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2583
2584 ReplaceNode(Node, Load);
2585 return;
2586 }
2587 case Intrinsic::riscv_vleff:
2588 case Intrinsic::riscv_vleff_mask: {
2589 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2590
2591 MVT VT = Node->getSimpleValueType(0);
2592 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2593
2594 unsigned CurOp = 2;
2595 SmallVector<SDValue, 7> Operands;
2596 Operands.push_back(Node->getOperand(CurOp++));
2597 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2598 /*IsStridedOrIndexed*/ false, Operands,
2599 /*IsLoad=*/true);
2600
2602 const RISCV::VLEPseudo *P =
2603 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2604 Log2SEW, static_cast<unsigned>(LMUL));
2605 MachineSDNode *Load = CurDAG->getMachineNode(
2606 P->Pseudo, DL, Node->getVTList(), Operands);
2607 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2608
2609 ReplaceNode(Node, Load);
2610 return;
2611 }
2612 case Intrinsic::riscv_nds_vln:
2613 case Intrinsic::riscv_nds_vln_mask:
2614 case Intrinsic::riscv_nds_vlnu:
2615 case Intrinsic::riscv_nds_vlnu_mask: {
2616 bool IsMasked = IntNo == Intrinsic::riscv_nds_vln_mask ||
2617 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2618 bool IsUnsigned = IntNo == Intrinsic::riscv_nds_vlnu ||
2619 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2620
2621 MVT VT = Node->getSimpleValueType(0);
2622 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2623 unsigned CurOp = 2;
2624 SmallVector<SDValue, 8> Operands;
2625
2626 Operands.push_back(Node->getOperand(CurOp++));
2627 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2628 /*IsStridedOrIndexed=*/false, Operands,
2629 /*IsLoad=*/true);
2630
2632 const RISCV::NDSVLNPseudo *P = RISCV::getNDSVLNPseudo(
2633 IsMasked, IsUnsigned, Log2SEW, static_cast<unsigned>(LMUL));
2634 MachineSDNode *Load =
2635 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2636
2637 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2638 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2639
2640 ReplaceNode(Node, Load);
2641 return;
2642 }
2643 }
2644 break;
2645 }
2646 case ISD::INTRINSIC_VOID: {
2647 unsigned IntNo = Node->getConstantOperandVal(1);
2648 switch (IntNo) {
2649 case Intrinsic::riscv_vsseg2:
2650 case Intrinsic::riscv_vsseg3:
2651 case Intrinsic::riscv_vsseg4:
2652 case Intrinsic::riscv_vsseg5:
2653 case Intrinsic::riscv_vsseg6:
2654 case Intrinsic::riscv_vsseg7:
2655 case Intrinsic::riscv_vsseg8: {
2656 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2657 /*IsStrided*/ false);
2658 return;
2659 }
2660 case Intrinsic::riscv_vsseg2_mask:
2661 case Intrinsic::riscv_vsseg3_mask:
2662 case Intrinsic::riscv_vsseg4_mask:
2663 case Intrinsic::riscv_vsseg5_mask:
2664 case Intrinsic::riscv_vsseg6_mask:
2665 case Intrinsic::riscv_vsseg7_mask:
2666 case Intrinsic::riscv_vsseg8_mask: {
2667 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2668 /*IsStrided*/ false);
2669 return;
2670 }
2671 case Intrinsic::riscv_vssseg2:
2672 case Intrinsic::riscv_vssseg3:
2673 case Intrinsic::riscv_vssseg4:
2674 case Intrinsic::riscv_vssseg5:
2675 case Intrinsic::riscv_vssseg6:
2676 case Intrinsic::riscv_vssseg7:
2677 case Intrinsic::riscv_vssseg8: {
2678 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2679 /*IsStrided*/ true);
2680 return;
2681 }
2682 case Intrinsic::riscv_vssseg2_mask:
2683 case Intrinsic::riscv_vssseg3_mask:
2684 case Intrinsic::riscv_vssseg4_mask:
2685 case Intrinsic::riscv_vssseg5_mask:
2686 case Intrinsic::riscv_vssseg6_mask:
2687 case Intrinsic::riscv_vssseg7_mask:
2688 case Intrinsic::riscv_vssseg8_mask: {
2689 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2690 /*IsStrided*/ true);
2691 return;
2692 }
2693 case Intrinsic::riscv_vsoxseg2:
2694 case Intrinsic::riscv_vsoxseg3:
2695 case Intrinsic::riscv_vsoxseg4:
2696 case Intrinsic::riscv_vsoxseg5:
2697 case Intrinsic::riscv_vsoxseg6:
2698 case Intrinsic::riscv_vsoxseg7:
2699 case Intrinsic::riscv_vsoxseg8:
2700 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2701 /*IsOrdered*/ true);
2702 return;
2703 case Intrinsic::riscv_vsuxseg2:
2704 case Intrinsic::riscv_vsuxseg3:
2705 case Intrinsic::riscv_vsuxseg4:
2706 case Intrinsic::riscv_vsuxseg5:
2707 case Intrinsic::riscv_vsuxseg6:
2708 case Intrinsic::riscv_vsuxseg7:
2709 case Intrinsic::riscv_vsuxseg8:
2710 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2711 /*IsOrdered*/ false);
2712 return;
2713 case Intrinsic::riscv_vsoxseg2_mask:
2714 case Intrinsic::riscv_vsoxseg3_mask:
2715 case Intrinsic::riscv_vsoxseg4_mask:
2716 case Intrinsic::riscv_vsoxseg5_mask:
2717 case Intrinsic::riscv_vsoxseg6_mask:
2718 case Intrinsic::riscv_vsoxseg7_mask:
2719 case Intrinsic::riscv_vsoxseg8_mask:
2720 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2721 /*IsOrdered*/ true);
2722 return;
2723 case Intrinsic::riscv_vsuxseg2_mask:
2724 case Intrinsic::riscv_vsuxseg3_mask:
2725 case Intrinsic::riscv_vsuxseg4_mask:
2726 case Intrinsic::riscv_vsuxseg5_mask:
2727 case Intrinsic::riscv_vsuxseg6_mask:
2728 case Intrinsic::riscv_vsuxseg7_mask:
2729 case Intrinsic::riscv_vsuxseg8_mask:
2730 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2731 /*IsOrdered*/ false);
2732 return;
2733 case Intrinsic::riscv_vsoxei:
2734 case Intrinsic::riscv_vsoxei_mask:
2735 case Intrinsic::riscv_vsuxei:
2736 case Intrinsic::riscv_vsuxei_mask: {
2737 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2738 IntNo == Intrinsic::riscv_vsuxei_mask;
2739 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2740 IntNo == Intrinsic::riscv_vsoxei_mask;
2741
2742 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2743 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2744
2745 unsigned CurOp = 2;
2746 SmallVector<SDValue, 8> Operands;
2747 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2748
2749 MVT IndexVT;
2750 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2751 /*IsStridedOrIndexed*/ true, Operands,
2752 /*IsLoad=*/false, &IndexVT);
2753
2755 "Element count mismatch");
2756
2759 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2760 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2761 reportFatalUsageError("The V extension does not support EEW=64 for "
2762 "index values when XLEN=32");
2763 }
2764 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2765 IsMasked, IsOrdered, IndexLog2EEW,
2766 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2767 MachineSDNode *Store =
2768 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2769
2770 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2771
2772 ReplaceNode(Node, Store);
2773 return;
2774 }
2775 case Intrinsic::riscv_vsm:
2776 case Intrinsic::riscv_vse:
2777 case Intrinsic::riscv_vse_mask:
2778 case Intrinsic::riscv_vsse:
2779 case Intrinsic::riscv_vsse_mask: {
2780 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2781 IntNo == Intrinsic::riscv_vsse_mask;
2782 bool IsStrided =
2783 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2784
2785 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2786 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2787
2788 unsigned CurOp = 2;
2789 SmallVector<SDValue, 8> Operands;
2790 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2791
2792 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2793 Operands);
2794
2796 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2797 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2798 MachineSDNode *Store =
2799 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2800 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2801
2802 ReplaceNode(Node, Store);
2803 return;
2804 }
2805 case Intrinsic::riscv_sf_vc_x_se:
2806 case Intrinsic::riscv_sf_vc_i_se:
2808 return;
2809 case Intrinsic::riscv_sf_vlte8:
2810 case Intrinsic::riscv_sf_vlte16:
2811 case Intrinsic::riscv_sf_vlte32:
2812 case Intrinsic::riscv_sf_vlte64: {
2813 unsigned Log2SEW;
2814 unsigned PseudoInst;
2815 switch (IntNo) {
2816 case Intrinsic::riscv_sf_vlte8:
2817 PseudoInst = RISCV::PseudoSF_VLTE8;
2818 Log2SEW = 3;
2819 break;
2820 case Intrinsic::riscv_sf_vlte16:
2821 PseudoInst = RISCV::PseudoSF_VLTE16;
2822 Log2SEW = 4;
2823 break;
2824 case Intrinsic::riscv_sf_vlte32:
2825 PseudoInst = RISCV::PseudoSF_VLTE32;
2826 Log2SEW = 5;
2827 break;
2828 case Intrinsic::riscv_sf_vlte64:
2829 PseudoInst = RISCV::PseudoSF_VLTE64;
2830 Log2SEW = 6;
2831 break;
2832 }
2833
2834 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2835 SDValue TWidenOp = CurDAG->getTargetConstant(1, DL, XLenVT);
2836 SDValue Operands[] = {Node->getOperand(2),
2837 Node->getOperand(3),
2838 Node->getOperand(4),
2839 SEWOp,
2840 TWidenOp,
2841 Node->getOperand(0)};
2842
2843 MachineSDNode *TileLoad =
2844 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2845 CurDAG->setNodeMemRefs(TileLoad,
2846 {cast<MemSDNode>(Node)->getMemOperand()});
2847
2848 ReplaceNode(Node, TileLoad);
2849 return;
2850 }
2851 case Intrinsic::riscv_sf_mm_s_s:
2852 case Intrinsic::riscv_sf_mm_s_u:
2853 case Intrinsic::riscv_sf_mm_u_s:
2854 case Intrinsic::riscv_sf_mm_u_u:
2855 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2856 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2857 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2858 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2859 case Intrinsic::riscv_sf_mm_f_f: {
2860 bool HasFRM = false;
2861 unsigned PseudoInst;
2862 switch (IntNo) {
2863 case Intrinsic::riscv_sf_mm_s_s:
2864 PseudoInst = RISCV::PseudoSF_MM_S_S;
2865 break;
2866 case Intrinsic::riscv_sf_mm_s_u:
2867 PseudoInst = RISCV::PseudoSF_MM_S_U;
2868 break;
2869 case Intrinsic::riscv_sf_mm_u_s:
2870 PseudoInst = RISCV::PseudoSF_MM_U_S;
2871 break;
2872 case Intrinsic::riscv_sf_mm_u_u:
2873 PseudoInst = RISCV::PseudoSF_MM_U_U;
2874 break;
2875 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2876 PseudoInst = RISCV::PseudoSF_MM_E5M2_E5M2;
2877 HasFRM = true;
2878 break;
2879 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2880 PseudoInst = RISCV::PseudoSF_MM_E5M2_E4M3;
2881 HasFRM = true;
2882 break;
2883 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2884 PseudoInst = RISCV::PseudoSF_MM_E4M3_E5M2;
2885 HasFRM = true;
2886 break;
2887 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2888 PseudoInst = RISCV::PseudoSF_MM_E4M3_E4M3;
2889 HasFRM = true;
2890 break;
2891 case Intrinsic::riscv_sf_mm_f_f:
2892 if (Node->getOperand(3).getValueType().getScalarType() == MVT::bf16)
2893 PseudoInst = RISCV::PseudoSF_MM_F_F_ALT;
2894 else
2895 PseudoInst = RISCV::PseudoSF_MM_F_F;
2896 HasFRM = true;
2897 break;
2898 }
2899 uint64_t TileNum = Node->getConstantOperandVal(2);
2900 SDValue Op1 = Node->getOperand(3);
2901 SDValue Op2 = Node->getOperand(4);
2902 MVT VT = Op1->getSimpleValueType(0);
2903 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2904 SDValue TmOp = Node->getOperand(5);
2905 SDValue TnOp = Node->getOperand(6);
2906 SDValue TkOp = Node->getOperand(7);
2907 SDValue TWidenOp = Node->getOperand(8);
2908 SDValue Chain = Node->getOperand(0);
2909
2910 // sf.mm.f.f with sew=32, twiden=2 is invalid
2911 if (IntNo == Intrinsic::riscv_sf_mm_f_f && Log2SEW == 5 &&
2912 TWidenOp->getAsZExtVal() == 2)
2913 reportFatalUsageError("sf.mm.f.f doesn't support (sew=32, twiden=2)");
2914
2915 SmallVector<SDValue, 10> Operands(
2916 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Op1, Op2});
2917 if (HasFRM)
2918 Operands.push_back(
2919 CurDAG->getTargetConstant(RISCVFPRndMode::DYN, DL, XLenVT));
2920 Operands.append({TmOp, TnOp, TkOp,
2921 CurDAG->getTargetConstant(Log2SEW, DL, XLenVT), TWidenOp,
2922 Chain});
2923
2924 auto *NewNode =
2925 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2926
2927 ReplaceNode(Node, NewNode);
2928 return;
2929 }
2930 case Intrinsic::riscv_sf_vtzero_t: {
2931 uint64_t TileNum = Node->getConstantOperandVal(2);
2932 SDValue Tm = Node->getOperand(3);
2933 SDValue Tn = Node->getOperand(4);
2934 SDValue Log2SEW = Node->getOperand(5);
2935 SDValue TWiden = Node->getOperand(6);
2936 SDValue Chain = Node->getOperand(0);
2937 auto *NewNode = CurDAG->getMachineNode(
2938 RISCV::PseudoSF_VTZERO_T, DL, Node->getVTList(),
2939 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Tm, Tn, Log2SEW,
2940 TWiden, Chain});
2941
2942 ReplaceNode(Node, NewNode);
2943 return;
2944 }
2945 }
2946 break;
2947 }
2948 case ISD::BITCAST: {
2949 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2950 // Just drop bitcasts between vectors if both are fixed or both are
2951 // scalable.
2952 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2953 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2954 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2955 CurDAG->RemoveDeadNode(Node);
2956 return;
2957 }
2958 if (Subtarget->hasStdExtP()) {
2959 bool Is32BitCast =
2960 (VT == MVT::i32 && (SrcVT == MVT::v4i8 || SrcVT == MVT::v2i16)) ||
2961 (SrcVT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
2962 bool Is64BitCast =
2963 (VT == MVT::i64 && (SrcVT == MVT::v8i8 || SrcVT == MVT::v4i16 ||
2964 SrcVT == MVT::v2i32)) ||
2965 (SrcVT == MVT::i64 &&
2966 (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32));
2967 if (Is32BitCast || Is64BitCast) {
2968 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2969 CurDAG->RemoveDeadNode(Node);
2970 return;
2971 }
2972 }
2973 break;
2974 }
2975 case ISD::SPLAT_VECTOR: {
2976 if (!Subtarget->hasStdExtP())
2977 break;
2978 if (auto *ConstNode = dyn_cast<ConstantSDNode>(Node->getOperand(0))) {
2979 bool IsDoubleWide = Subtarget->isPExtPackedDoubleType(VT);
2980
2981 if (ConstNode->isZero()) {
2982 MCPhysReg X0Reg = IsDoubleWide ? RISCV::X0_Pair : RISCV::X0;
2983 SDValue New =
2984 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, X0Reg, VT);
2985 ReplaceNode(Node, New.getNode());
2986 return;
2987 }
2988
2989 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
2990 APInt Val = ConstNode->getAPIntValue().trunc(EltSize);
2991
2992 // Use LI for all ones since it can be compressed to c.li.
2993 if (Val.isAllOnes() && !IsDoubleWide) {
2994 SDNode *NewNode = CurDAG->getMachineNode(
2995 RISCV::ADDI, DL, VT, CurDAG->getRegister(RISCV::X0, VT),
2996 CurDAG->getAllOnesConstant(DL, XLenVT, /*IsTarget=*/true));
2997 ReplaceNode(Node, NewNode);
2998 return;
2999 }
3000
3001 // Find the smallest splat.
3002 if (Val.getBitWidth() > 16 && Val.isSplat(16))
3003 Val = Val.trunc(16);
3004 if (Val.getBitWidth() > 8 && Val.isSplat(8))
3005 Val = Val.trunc(8);
3006
3007 EltSize = Val.getBitWidth();
3008 int64_t Imm = Val.getSExtValue();
3009
3010 unsigned Opc = 0;
3011 if (EltSize == 8) {
3012 Opc = IsDoubleWide ? RISCV::PLI_DB : RISCV::PLI_B;
3013 } else if (EltSize == 16 && isInt<10>(Imm)) {
3014 Opc = IsDoubleWide ? RISCV::PLI_DH : RISCV::PLI_H;
3015 } else if (!IsDoubleWide && EltSize == 32 && isInt<10>(Imm)) {
3016 Opc = RISCV::PLI_W;
3017 } else if (EltSize == 16 && isShiftedInt<10, 6>(Imm)) {
3018 Opc = IsDoubleWide ? RISCV::PLUI_DH : RISCV::PLUI_H;
3019 Imm = Imm >> 6;
3020 } else if (!IsDoubleWide && EltSize == 32 && isShiftedInt<10, 22>(Imm)) {
3021 Opc = RISCV::PLUI_W;
3022 Imm = Imm >> 22;
3023 }
3024
3025 if (Opc) {
3026 SDNode *NewNode = CurDAG->getMachineNode(
3027 Opc, DL, VT, CurDAG->getSignedTargetConstant(Imm, DL, XLenVT));
3028 ReplaceNode(Node, NewNode);
3029 return;
3030 }
3031 }
3032
3033 break;
3034 }
3036 if (Subtarget->hasStdExtP()) {
3037 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
3038 if ((VT == MVT::v2i32 && SrcVT == MVT::i64) ||
3039 (VT == MVT::v4i8 && SrcVT == MVT::i32)) {
3040 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
3041 CurDAG->RemoveDeadNode(Node);
3042 return;
3043 }
3044 }
3045 break;
3047 case RISCVISD::TUPLE_INSERT: {
3048 SDValue V = Node->getOperand(0);
3049 SDValue SubV = Node->getOperand(1);
3050 SDLoc DL(SubV);
3051 auto Idx = Node->getConstantOperandVal(2);
3052 MVT SubVecVT = SubV.getSimpleValueType();
3053
3054 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
3055 MVT SubVecContainerVT = SubVecVT;
3056 // Establish the correct scalable-vector types for any fixed-length type.
3057 if (SubVecVT.isFixedLengthVector()) {
3058 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
3060 [[maybe_unused]] bool ExactlyVecRegSized =
3061 Subtarget->expandVScale(SubVecVT.getSizeInBits())
3062 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
3063 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
3064 .getKnownMinValue()));
3065 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
3066 }
3067 MVT ContainerVT = VT;
3068 if (VT.isFixedLengthVector())
3069 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
3070
3071 const auto *TRI = Subtarget->getRegisterInfo();
3072 unsigned SubRegIdx;
3073 std::tie(SubRegIdx, Idx) =
3075 ContainerVT, SubVecContainerVT, Idx, TRI);
3076
3077 // If the Idx hasn't been completely eliminated then this is a subvector
3078 // insert which doesn't naturally align to a vector register. These must
3079 // be handled using instructions to manipulate the vector registers.
3080 if (Idx != 0)
3081 break;
3082
3083 RISCVVType::VLMUL SubVecLMUL =
3084 RISCVTargetLowering::getLMUL(SubVecContainerVT);
3085 [[maybe_unused]] bool IsSubVecPartReg =
3086 SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
3087 SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
3088 SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
3089 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
3090 V.isUndef()) &&
3091 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
3092 "the subvector is smaller than a full-sized register");
3093
3094 // If we haven't set a SubRegIdx, then we must be going between
3095 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
3096 if (SubRegIdx == RISCV::NoSubRegister) {
3097 unsigned InRegClassID =
3100 InRegClassID &&
3101 "Unexpected subvector extraction");
3102 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
3103 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
3104 DL, VT, SubV, RC);
3105 ReplaceNode(Node, NewNode);
3106 return;
3107 }
3108
3109 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
3110 ReplaceNode(Node, Insert.getNode());
3111 return;
3112 }
3114 case RISCVISD::TUPLE_EXTRACT: {
3115 SDValue V = Node->getOperand(0);
3116 auto Idx = Node->getConstantOperandVal(1);
3117 MVT InVT = V.getSimpleValueType();
3118
3119 // Handle P-extension extract_subvector for v2i16 from v4i16 and v4i8 from
3120 // v8i8
3121 if (Subtarget->hasStdExtP() && !Subtarget->is64Bit() &&
3122 ((InVT == MVT::v4i16 && VT == MVT::v2i16) ||
3123 (InVT == MVT::v8i8 && VT == MVT::v4i8))) {
3124 unsigned NumElts = VT.getVectorNumElements();
3125 if (Idx != 0 && Idx != NumElts)
3126 break;
3127
3128 unsigned SubRegIdx = Idx == 0 ? RISCV::sub_gpr_even : RISCV::sub_gpr_odd;
3129 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
3130 ReplaceNode(Node, Extract.getNode());
3131 return;
3132 }
3133
3134 SDLoc DL(V);
3135
3136 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
3137 MVT SubVecContainerVT = VT;
3138 // Establish the correct scalable-vector types for any fixed-length type.
3139 if (VT.isFixedLengthVector()) {
3140 assert(Idx == 0);
3141 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
3142 }
3143 if (InVT.isFixedLengthVector())
3144 InVT = TLI.getContainerForFixedLengthVector(InVT);
3145
3146 const auto *TRI = Subtarget->getRegisterInfo();
3147 unsigned SubRegIdx;
3148 std::tie(SubRegIdx, Idx) =
3150 InVT, SubVecContainerVT, Idx, TRI);
3151
3152 // If the Idx hasn't been completely eliminated then this is a subvector
3153 // extract which doesn't naturally align to a vector register. These must
3154 // be handled using instructions to manipulate the vector registers.
3155 if (Idx != 0)
3156 break;
3157
3158 // If we haven't set a SubRegIdx, then we must be going between
3159 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
3160 if (SubRegIdx == RISCV::NoSubRegister) {
3161 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
3163 InRegClassID &&
3164 "Unexpected subvector extraction");
3165 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
3166 SDNode *NewNode =
3167 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
3168 ReplaceNode(Node, NewNode);
3169 return;
3170 }
3171
3172 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
3173 ReplaceNode(Node, Extract.getNode());
3174 return;
3175 }
3176 case RISCVISD::VMV_S_X_VL:
3177 case RISCVISD::VFMV_S_F_VL:
3178 case RISCVISD::VMV_V_X_VL:
3179 case RISCVISD::VFMV_V_F_VL: {
3180 // Try to match splat of a scalar load to a strided load with stride of x0.
3181 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
3182 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
3183 if (!Node->getOperand(0).isUndef())
3184 break;
3185 SDValue Src = Node->getOperand(1);
3186 auto *Ld = dyn_cast<LoadSDNode>(Src);
3187 // Can't fold load update node because the second
3188 // output is used so that load update node can't be removed.
3189 if (!Ld || Ld->isIndexed())
3190 break;
3191 EVT MemVT = Ld->getMemoryVT();
3192 // The memory VT should be the same size as the element type.
3193 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
3194 break;
3195 if (!IsProfitableToFold(Src, Node, Node) ||
3196 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
3197 break;
3198
3199 SDValue VL;
3200 if (IsScalarMove) {
3201 // We could deal with more VL if we update the VSETVLI insert pass to
3202 // avoid introducing more VSETVLI.
3203 if (!isOneConstant(Node->getOperand(2)))
3204 break;
3205 selectVLOp(Node->getOperand(2), VL);
3206 } else
3207 selectVLOp(Node->getOperand(2), VL);
3208
3209 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
3210 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
3211
3212 // If VL=1, then we don't need to do a strided load and can just do a
3213 // regular load.
3214 bool IsStrided = !isOneConstant(VL);
3215
3216 // Only do a strided load if we have optimized zero-stride vector load.
3217 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
3218 break;
3219
3220 SmallVector<SDValue> Operands = {
3221 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
3222 Ld->getBasePtr()};
3223 if (IsStrided)
3224 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
3226 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
3227 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
3228
3230 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
3231 /*IsMasked*/ false, IsStrided, /*FF*/ false,
3232 Log2SEW, static_cast<unsigned>(LMUL));
3233 MachineSDNode *Load =
3234 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
3235 // Update the chain.
3236 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
3237 // Record the mem-refs
3238 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
3239 // Replace the splat with the vlse.
3240 ReplaceNode(Node, Load);
3241 return;
3242 }
3243 case RISCVISD::LPAD_CALL:
3244 case RISCVISD::LPAD_CALL_INDIRECT: {
3245 bool IsIndirect = Opcode == RISCVISD::LPAD_CALL_INDIRECT;
3246 unsigned PseudoOpc = IsIndirect ? RISCV::PseudoCALLIndirectLpadAlign
3247 : RISCV::PseudoCALLLpadAlign;
3248
3249 uint32_t LpadLabel = 0;
3250 if (PreferredLandingPadLabel.getNumOccurrences() > 0) {
3252 report_fatal_error("riscv-landing-pad-label=<val>, <val> needs to fit "
3253 "in unsigned 20-bits");
3254 LpadLabel = PreferredLandingPadLabel;
3255 }
3256
3258 Ops.push_back(Node->getOperand(1));
3259 Ops.push_back(CurDAG->getTargetConstant(LpadLabel, DL, XLenVT));
3260 Ops.push_back(Node->getOperand(0));
3261 if (Node->getGluedNode())
3262 Ops.push_back(Node->getOperand(Node->getNumOperands() - 1));
3263
3265 CurDAG->getMachineNode(PseudoOpc, DL, Node->getVTList(), Ops));
3266 return;
3267 }
3268 case ISD::PREFETCH:
3269 // MIPS's prefetch instruction already encodes the hint within the
3270 // instruction itself, so no extra NTL hint is needed.
3271 if (Subtarget->hasVendorXMIPSCBOP())
3272 break;
3273
3274 unsigned Locality = Node->getConstantOperandVal(3);
3275 if (Locality > 2)
3276 break;
3277
3278 auto *LoadStoreMem = cast<MemSDNode>(Node);
3279 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
3281
3282 int NontemporalLevel = 0;
3283 switch (Locality) {
3284 case 0:
3285 NontemporalLevel = 3; // NTL.ALL
3286 break;
3287 case 1:
3288 NontemporalLevel = 1; // NTL.PALL
3289 break;
3290 case 2:
3291 NontemporalLevel = 0; // NTL.P1
3292 break;
3293 default:
3294 llvm_unreachable("unexpected locality value.");
3295 }
3296
3297 if (NontemporalLevel & 0b1)
3299 if (NontemporalLevel & 0b10)
3301 break;
3302 }
3303
3304 // Select the default instruction.
3305 SelectCode(Node);
3306}
3307
3309 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
3310 std::vector<SDValue> &OutOps) {
3311 // Always produce a register and immediate operand, as expected by
3312 // RISCVAsmPrinter::PrintAsmMemoryOperand.
3313 switch (ConstraintID) {
3316 SDValue Op0, Op1;
3317 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
3318 assert(Found && "SelectAddrRegImm should always succeed");
3319 OutOps.push_back(Op0);
3320 OutOps.push_back(Op1);
3321 return false;
3322 }
3324 OutOps.push_back(Op);
3325 OutOps.push_back(
3326 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
3327 return false;
3328 default:
3329 report_fatal_error("Unexpected asm memory constraint " +
3330 InlineAsm::getMemConstraintName(ConstraintID));
3331 }
3332
3333 return true;
3334}
3335
3337 SDValue &Offset) {
3338 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3339 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
3340 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
3341 return true;
3342 }
3343
3344 return false;
3345}
3346
3347// Fold constant addresses.
3348static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
3349 const MVT VT, const RISCVSubtarget *Subtarget,
3351 bool IsPrefetch = false) {
3352 if (!isa<ConstantSDNode>(Addr))
3353 return false;
3354
3355 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
3356
3357 // If the constant is a simm12, we can fold the whole constant and use X0 as
3358 // the base. If the constant can be materialized with LUI+simm12, use LUI as
3359 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
3360 int64_t Lo12 = SignExtend64<12>(CVal);
3361 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
3362 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
3363 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3364 return false;
3365 if (Hi) {
3366 int64_t Hi20 = (Hi >> 12) & 0xfffff;
3367 Base = SDValue(
3368 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
3369 CurDAG->getTargetConstant(Hi20, DL, VT)),
3370 0);
3371 } else {
3372 Base = CurDAG->getRegister(RISCV::X0, VT);
3373 }
3374 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
3375 return true;
3376 }
3377
3378 // Ask how constant materialization would handle this constant.
3379 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
3380
3381 // If the last instruction would be an ADDI, we can fold its immediate and
3382 // emit the rest of the sequence as the base.
3383 if (Seq.back().getOpcode() != RISCV::ADDI)
3384 return false;
3385 Lo12 = Seq.back().getImm();
3386 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3387 return false;
3388
3389 // Drop the last instruction.
3390 Seq.pop_back();
3391 assert(!Seq.empty() && "Expected more instructions in sequence");
3392
3393 Base = selectImmSeq(CurDAG, DL, VT, Seq);
3394 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
3395 return true;
3396}
3397
3398// Is this ADD instruction only used as the base pointer of scalar loads and
3399// stores?
3401 for (auto *User : Add->users()) {
3402 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
3403 User->getOpcode() != RISCVISD::LD_RV32 &&
3404 User->getOpcode() != RISCVISD::SD_RV32 &&
3405 User->getOpcode() != ISD::ATOMIC_LOAD &&
3406 User->getOpcode() != ISD::ATOMIC_STORE)
3407 return false;
3408 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3409 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
3410 VT != MVT::f64)
3411 return false;
3412 // Don't allow stores of the value. It must be used as the address.
3413 if (User->getOpcode() == ISD::STORE &&
3414 cast<StoreSDNode>(User)->getValue() == Add)
3415 return false;
3416 if (User->getOpcode() == ISD::ATOMIC_STORE &&
3417 cast<AtomicSDNode>(User)->getVal() == Add)
3418 return false;
3419 if (User->getOpcode() == RISCVISD::SD_RV32 &&
3420 (User->getOperand(0) == Add || User->getOperand(1) == Add))
3421 return false;
3422 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
3423 return false;
3424 }
3425
3426 return true;
3427}
3428
3430 switch (User->getOpcode()) {
3431 default:
3432 return false;
3433 case ISD::LOAD:
3434 case RISCVISD::LD_RV32:
3435 case ISD::ATOMIC_LOAD:
3436 break;
3437 case ISD::STORE:
3438 // Don't allow stores of Add. It must only be used as the address.
3440 return false;
3441 break;
3442 case RISCVISD::SD_RV32:
3443 // Don't allow stores of Add. It must only be used as the address.
3444 if (User->getOperand(0) == Add || User->getOperand(1) == Add)
3445 return false;
3446 break;
3447 case ISD::ATOMIC_STORE:
3448 // Don't allow stores of Add. It must only be used as the address.
3449 if (cast<AtomicSDNode>(User)->getVal() == Add)
3450 return false;
3451 break;
3452 }
3453
3454 return true;
3455}
3456
3457// To prevent SelectAddrRegImm from folding offsets that conflict with the
3458// fusion of PseudoMovAddr, check if the offset of every use of a given address
3459// is within the alignment.
3461 Align Alignment) {
3462 assert(Addr->getOpcode() == RISCVISD::ADD_LO);
3463 for (auto *User : Addr->users()) {
3464 // If the user is a load or store, then the offset is 0 which is always
3465 // within alignment.
3466 if (isRegImmLoadOrStore(User, Addr))
3467 continue;
3468
3469 if (CurDAG->isBaseWithConstantOffset(SDValue(User, 0))) {
3470 int64_t CVal = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3471 if (!isInt<12>(CVal) || Alignment <= CVal)
3472 return false;
3473
3474 // Make sure all uses are foldable load/stores.
3475 for (auto *AddUser : User->users())
3476 if (!isRegImmLoadOrStore(AddUser, SDValue(User, 0)))
3477 return false;
3478
3479 continue;
3480 }
3481
3482 return false;
3483 }
3484
3485 return true;
3486}
3487
3489 SDValue &Offset) {
3490 if (SelectAddrFrameIndex(Addr, Base, Offset))
3491 return true;
3492
3493 SDLoc DL(Addr);
3494 MVT VT = Addr.getSimpleValueType();
3495
3496 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
3497 bool CanFold = true;
3498 // Unconditionally fold if operand 1 is not a global address (e.g.
3499 // externsymbol)
3500 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
3501 const DataLayout &DL = CurDAG->getDataLayout();
3502 Align Alignment = commonAlignment(
3503 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3504 if (!areOffsetsWithinAlignment(Addr, Alignment))
3505 CanFold = false;
3506 }
3507 if (CanFold) {
3508 Base = Addr.getOperand(0);
3509 Offset = Addr.getOperand(1);
3510 return true;
3511 }
3512 }
3513
3514 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3515 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3516 if (isInt<12>(CVal)) {
3517 Base = Addr.getOperand(0);
3518 if (Base.getOpcode() == RISCVISD::ADD_LO) {
3519 SDValue LoOperand = Base.getOperand(1);
3520 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
3521 // If the Lo in (ADD_LO hi, lo) is a global variable's address
3522 // (its low part, really), then we can rely on the alignment of that
3523 // variable to provide a margin of safety before low part can overflow
3524 // the 12 bits of the load/store offset. Check if CVal falls within
3525 // that margin; if so (low part + CVal) can't overflow.
3526 const DataLayout &DL = CurDAG->getDataLayout();
3527 Align Alignment = commonAlignment(
3528 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3529 if ((CVal == 0 || Alignment > CVal) &&
3530 areOffsetsWithinAlignment(Base, Alignment)) {
3531 int64_t CombinedOffset = CVal + GA->getOffset();
3532 Base = Base.getOperand(0);
3533 Offset = CurDAG->getTargetGlobalAddress(
3534 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
3535 CombinedOffset, GA->getTargetFlags());
3536 return true;
3537 }
3538 }
3539 }
3540
3541 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3542 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3543 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3544 return true;
3545 }
3546 }
3547
3548 // Handle ADD with large immediates.
3549 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3550 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3551 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3552
3553 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
3554 // an ADDI for part of the offset and fold the rest into the load/store.
3555 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
3556 if (CVal >= -4096 && CVal <= 4094) {
3557 int64_t Adj = CVal < 0 ? -2048 : 2047;
3558 Base = SDValue(
3559 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
3560 CurDAG->getSignedTargetConstant(Adj, DL, VT)),
3561 0);
3562 Offset = CurDAG->getSignedTargetConstant(CVal - Adj, DL, VT);
3563 return true;
3564 }
3565
3566 // For larger immediates, we might be able to save one instruction from
3567 // constant materialization by folding the Lo12 bits of the immediate into
3568 // the address. We should only do this if the ADD is only used by loads and
3569 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
3570 // separately with the full materialized immediate creating extra
3571 // instructions.
3572 if (isWorthFoldingAdd(Addr) &&
3573 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3574 Offset, /*IsPrefetch=*/false)) {
3575 // Insert an ADD instruction with the materialized Hi52 bits.
3576 Base = SDValue(
3577 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3578 0);
3579 return true;
3580 }
3581 }
3582
3583 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3584 /*IsPrefetch=*/false))
3585 return true;
3586
3587 Base = Addr;
3588 Offset = CurDAG->getTargetConstant(0, DL, VT);
3589 return true;
3590}
3591
3592/// Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
3594 SDValue &Offset) {
3595 if (SelectAddrFrameIndex(Addr, Base, Offset))
3596 return true;
3597
3598 SDLoc DL(Addr);
3599 MVT VT = Addr.getSimpleValueType();
3600
3601 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3602 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3603 if (isUInt<9>(CVal)) {
3604 Base = Addr.getOperand(0);
3605
3606 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3607 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3608 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3609 return true;
3610 }
3611 }
3612
3613 Base = Addr;
3614 Offset = CurDAG->getTargetConstant(0, DL, VT);
3615 return true;
3616}
3617
3618/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
3619/// Offset should be all zeros.
3621 SDValue &Offset) {
3622 if (SelectAddrFrameIndex(Addr, Base, Offset))
3623 return true;
3624
3625 SDLoc DL(Addr);
3626 MVT VT = Addr.getSimpleValueType();
3627
3628 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3629 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3630 if (isInt<12>(CVal)) {
3631 Base = Addr.getOperand(0);
3632
3633 // Early-out if not a valid offset.
3634 if ((CVal & 0b11111) != 0) {
3635 Base = Addr;
3636 Offset = CurDAG->getTargetConstant(0, DL, VT);
3637 return true;
3638 }
3639
3640 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3641 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3642 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3643 return true;
3644 }
3645 }
3646
3647 // Handle ADD with large immediates.
3648 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3649 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3650 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3651
3652 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
3653 // one instruction by folding adjustment (-2048 or 2016) into the address.
3654 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
3655 int64_t Adj = CVal < 0 ? -2048 : 2016;
3656 int64_t AdjustedOffset = CVal - Adj;
3657 Base =
3658 SDValue(CurDAG->getMachineNode(
3659 RISCV::ADDI, DL, VT, Addr.getOperand(0),
3660 CurDAG->getSignedTargetConstant(AdjustedOffset, DL, VT)),
3661 0);
3662 Offset = CurDAG->getSignedTargetConstant(Adj, DL, VT);
3663 return true;
3664 }
3665
3666 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3667 Offset, /*IsPrefetch=*/true)) {
3668 // Insert an ADD instruction with the materialized Hi52 bits.
3669 Base = SDValue(
3670 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3671 0);
3672 return true;
3673 }
3674 }
3675
3676 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3677 /*IsPrefetch=*/true))
3678 return true;
3679
3680 Base = Addr;
3681 Offset = CurDAG->getTargetConstant(0, DL, VT);
3682 return true;
3683}
3684
3685/// Return true if this a load/store that we have a RegRegScale instruction for.
3687 const RISCVSubtarget &Subtarget) {
3688 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE)
3689 return false;
3690 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3691 if (!(VT.isScalarInteger() &&
3692 (Subtarget.hasVendorXTHeadMemIdx() || Subtarget.hasVendorXqcisls())) &&
3693 !((VT == MVT::f32 || VT == MVT::f64) &&
3694 Subtarget.hasVendorXTHeadFMemIdx()))
3695 return false;
3696 // Don't allow stores of the value. It must be used as the address.
3697 if (User->getOpcode() == ISD::STORE &&
3698 cast<StoreSDNode>(User)->getValue() == Add)
3699 return false;
3700
3701 return true;
3702}
3703
3704/// Is it profitable to fold this Add into RegRegScale load/store. If \p
3705/// Shift is non-null, then we have matched a shl+add. We allow reassociating
3706/// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) if there is a
3707/// single addi and we don't have a SHXADD instruction we could use.
3708/// FIXME: May still need to check how many and what kind of users the SHL has.
3710 SDValue Add,
3711 SDValue Shift = SDValue()) {
3712 bool FoundADDI = false;
3713 for (auto *User : Add->users()) {
3714 if (isRegRegScaleLoadOrStore(User, Add, Subtarget))
3715 continue;
3716
3717 // Allow a single ADDI that is used by loads/stores if we matched a shift.
3718 if (!Shift || FoundADDI || User->getOpcode() != ISD::ADD ||
3720 !isInt<12>(cast<ConstantSDNode>(User->getOperand(1))->getSExtValue()))
3721 return false;
3722
3723 FoundADDI = true;
3724
3725 // If we have a SHXADD instruction, prefer that over reassociating an ADDI.
3726 assert(Shift.getOpcode() == ISD::SHL);
3727 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
3728 if (Subtarget.hasShlAdd(ShiftAmt))
3729 return false;
3730
3731 // All users of the ADDI should be load/store.
3732 for (auto *ADDIUser : User->users())
3733 if (!isRegRegScaleLoadOrStore(ADDIUser, SDValue(User, 0), Subtarget))
3734 return false;
3735 }
3736
3737 return true;
3738}
3739
3741 unsigned MaxShiftAmount,
3742 SDValue &Base, SDValue &Index,
3743 SDValue &Scale) {
3744 if (Addr.getOpcode() != ISD::ADD)
3745 return false;
3746 SDValue LHS = Addr.getOperand(0);
3747 SDValue RHS = Addr.getOperand(1);
3748
3749 EVT VT = Addr.getSimpleValueType();
3750 auto SelectShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
3751 SDValue &Shift) {
3752 if (N.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(N.getOperand(1)))
3753 return false;
3754
3755 // Only match shifts by a value in range [0, MaxShiftAmount].
3756 unsigned ShiftAmt = N.getConstantOperandVal(1);
3757 if (ShiftAmt > MaxShiftAmount)
3758 return false;
3759
3760 Index = N.getOperand(0);
3761 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
3762 return true;
3763 };
3764
3765 if (auto *C1 = dyn_cast<ConstantSDNode>(RHS)) {
3766 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
3767 if (LHS.getOpcode() == ISD::ADD &&
3768 !isa<ConstantSDNode>(LHS.getOperand(1)) &&
3769 isInt<12>(C1->getSExtValue())) {
3770 if (SelectShl(LHS.getOperand(1), Index, Scale) &&
3771 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(1))) {
3772 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3773 SDLoc(Addr), VT);
3774 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3775 LHS.getOperand(0), C1Val),
3776 0);
3777 return true;
3778 }
3779
3780 // Add is commutative so we need to check both operands.
3781 if (SelectShl(LHS.getOperand(0), Index, Scale) &&
3782 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(0))) {
3783 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3784 SDLoc(Addr), VT);
3785 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3786 LHS.getOperand(1), C1Val),
3787 0);
3788 return true;
3789 }
3790 }
3791
3792 // Don't match add with constants.
3793 // FIXME: Is this profitable for large constants that have 0s in the lower
3794 // 12 bits that we can materialize with LUI?
3795 return false;
3796 }
3797
3798 // Try to match a shift on the RHS.
3799 if (SelectShl(RHS, Index, Scale)) {
3800 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, RHS))
3801 return false;
3802 Base = LHS;
3803 return true;
3804 }
3805
3806 // Try to match a shift on the LHS.
3807 if (SelectShl(LHS, Index, Scale)) {
3808 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, LHS))
3809 return false;
3810 Base = RHS;
3811 return true;
3812 }
3813
3814 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr))
3815 return false;
3816
3817 Base = LHS;
3818 Index = RHS;
3819 Scale = CurDAG->getTargetConstant(0, SDLoc(Addr), VT);
3820 return true;
3821}
3822
3824 unsigned MaxShiftAmount,
3825 unsigned Bits, SDValue &Base,
3826 SDValue &Index,
3827 SDValue &Scale) {
3828 if (!SelectAddrRegRegScale(Addr, MaxShiftAmount, Base, Index, Scale))
3829 return false;
3830
3831 if (Index.getOpcode() == ISD::AND) {
3832 auto *C = dyn_cast<ConstantSDNode>(Index.getOperand(1));
3833 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3834 Index = Index.getOperand(0);
3835 return true;
3836 }
3837 }
3838
3839 return false;
3840}
3841
3843 SDValue &Offset) {
3844 if (Addr.getOpcode() != ISD::ADD)
3845 return false;
3846
3847 if (isa<ConstantSDNode>(Addr.getOperand(1)))
3848 return false;
3849
3850 Base = Addr.getOperand(0);
3851 Offset = Addr.getOperand(1);
3852 return true;
3853}
3854
3856 SDValue &ShAmt) {
3857 ShAmt = N;
3858
3859 // Peek through zext.
3860 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
3861 ShAmt = ShAmt.getOperand(0);
3862
3863 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
3864 // amount. If there is an AND on the shift amount, we can bypass it if it
3865 // doesn't affect any of those bits.
3866 if (ShAmt.getOpcode() == ISD::AND &&
3867 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3868 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
3869
3870 // Since the max shift amount is a power of 2 we can subtract 1 to make a
3871 // mask that covers the bits needed to represent all shift amounts.
3872 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
3873 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
3874
3875 if (ShMask.isSubsetOf(AndMask)) {
3876 ShAmt = ShAmt.getOperand(0);
3877 } else {
3878 // SimplifyDemandedBits may have optimized the mask so try restoring any
3879 // bits that are known zero.
3880 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
3881 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
3882 return true;
3883 ShAmt = ShAmt.getOperand(0);
3884 }
3885 }
3886
3887 if (ShAmt.getOpcode() == ISD::ADD &&
3888 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3889 uint64_t Imm = ShAmt.getConstantOperandVal(1);
3890 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
3891 // to avoid the ADD.
3892 if (Imm != 0 && Imm % ShiftWidth == 0) {
3893 ShAmt = ShAmt.getOperand(0);
3894 return true;
3895 }
3896 } else if (ShAmt.getOpcode() == ISD::SUB &&
3897 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
3898 uint64_t Imm = ShAmt.getConstantOperandVal(0);
3899 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3900 // generate a NEG instead of a SUB of a constant.
3901 if (Imm != 0 && Imm % ShiftWidth == 0) {
3902 SDLoc DL(ShAmt);
3903 EVT VT = ShAmt.getValueType();
3904 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
3905 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
3906 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
3907 ShAmt.getOperand(1));
3908 ShAmt = SDValue(Neg, 0);
3909 return true;
3910 }
3911 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3912 // to generate a NOT instead of a SUB of a constant.
3913 if (Imm % ShiftWidth == ShiftWidth - 1) {
3914 SDLoc DL(ShAmt);
3915 EVT VT = ShAmt.getValueType();
3916 MachineSDNode *Not = CurDAG->getMachineNode(
3917 RISCV::XORI, DL, VT, ShAmt.getOperand(1),
3918 CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true));
3919 ShAmt = SDValue(Not, 0);
3920 return true;
3921 }
3922 }
3923
3924 return true;
3925}
3926
3927/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
3928/// check for equality with 0. This function emits instructions that convert the
3929/// seteq/setne into something that can be compared with 0.
3930/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
3931/// ISD::SETNE).
3933 SDValue &Val) {
3934 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
3935 "Unexpected condition code!");
3936
3937 // We're looking for a setcc.
3938 if (N->getOpcode() != ISD::SETCC)
3939 return false;
3940
3941 // Must be an equality comparison.
3942 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
3943 if (CCVal != ExpectedCCVal)
3944 return false;
3945
3946 SDValue LHS = N->getOperand(0);
3947 SDValue RHS = N->getOperand(1);
3948
3949 if (!LHS.getValueType().isScalarInteger())
3950 return false;
3951
3952 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
3953 if (isNullConstant(RHS)) {
3954 Val = LHS;
3955 return true;
3956 }
3957
3958 SDLoc DL(N);
3959
3960 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
3961 int64_t CVal = C->getSExtValue();
3962 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
3963 // non-zero otherwise.
3964 if (CVal == -2048) {
3965 Val = SDValue(
3966 CurDAG->getMachineNode(
3967 RISCV::XORI, DL, N->getValueType(0), LHS,
3968 CurDAG->getSignedTargetConstant(CVal, DL, N->getValueType(0))),
3969 0);
3970 return true;
3971 }
3972 // If the RHS is [-2047,2048], we can use addi/addiw with -RHS to produce 0
3973 // if the LHS is equal to the RHS and non-zero otherwise.
3974 if (isInt<12>(CVal) || CVal == 2048) {
3975 unsigned Opc = RISCV::ADDI;
3976 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3977 cast<VTSDNode>(LHS.getOperand(1))->getVT() == MVT::i32) {
3978 Opc = RISCV::ADDIW;
3979 LHS = LHS.getOperand(0);
3980 }
3981
3982 Val = SDValue(CurDAG->getMachineNode(Opc, DL, N->getValueType(0), LHS,
3983 CurDAG->getSignedTargetConstant(
3984 -CVal, DL, N->getValueType(0))),
3985 0);
3986 return true;
3987 }
3988 if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) {
3989 Val = SDValue(
3990 CurDAG->getMachineNode(
3991 RISCV::BINVI, DL, N->getValueType(0), LHS,
3992 CurDAG->getTargetConstant(Log2_64(CVal), DL, N->getValueType(0))),
3993 0);
3994 return true;
3995 }
3996 // Same as the addi case above but for larger immediates (signed 26-bit) use
3997 // the QC_E_ADDI instruction from the Xqcilia extension, if available. Avoid
3998 // anything which can be done with a single lui as it might be compressible.
3999 if (Subtarget->hasVendorXqcilia() && isInt<26>(CVal) &&
4000 (CVal & 0xFFF) != 0) {
4001 Val = SDValue(
4002 CurDAG->getMachineNode(
4003 RISCV::QC_E_ADDI, DL, N->getValueType(0), LHS,
4004 CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
4005 0);
4006 return true;
4007 }
4008 }
4009
4010 // If nothing else we can XOR the LHS and RHS to produce zero if they are
4011 // equal and a non-zero value if they aren't.
4012 Val = SDValue(
4013 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
4014 return true;
4015}
4016
4018 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4019 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
4020 Val = N.getOperand(0);
4021 return true;
4022 }
4023
4024 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
4025 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
4026 return N;
4027
4028 SDValue N0 = N.getOperand(0);
4029 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
4030 N.getConstantOperandVal(1) == ShiftAmt &&
4031 N0.getConstantOperandVal(1) == ShiftAmt)
4032 return N0.getOperand(0);
4033
4034 return N;
4035 };
4036
4037 MVT VT = N.getSimpleValueType();
4038 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
4039 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
4040 return true;
4041 }
4042
4043 return false;
4044}
4045
4047 if (N.getOpcode() == ISD::AND) {
4048 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4049 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
4050 Val = N.getOperand(0);
4051 return true;
4052 }
4053 }
4054 MVT VT = N.getSimpleValueType();
4055 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
4056 if (CurDAG->MaskedValueIsZero(N, Mask)) {
4057 Val = N;
4058 return true;
4059 }
4060
4061 return false;
4062}
4063
4064/// Look for various patterns that can be done with a SHL that can be folded
4065/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
4066/// SHXADD we are trying to match.
4068 SDValue &Val) {
4069 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
4070 SDValue N0 = N.getOperand(0);
4071
4072 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
4073 (LeftShift || N0.getOpcode() == ISD::SRL) &&
4075 uint64_t Mask = N.getConstantOperandVal(1);
4076 unsigned C2 = N0.getConstantOperandVal(1);
4077
4078 unsigned XLen = Subtarget->getXLen();
4079 if (LeftShift)
4080 Mask &= maskTrailingZeros<uint64_t>(C2);
4081 else
4082 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
4083
4084 if (isShiftedMask_64(Mask)) {
4085 unsigned Leading = XLen - llvm::bit_width(Mask);
4086 unsigned Trailing = llvm::countr_zero(Mask);
4087 if (Trailing != ShAmt)
4088 return false;
4089
4090 unsigned Opcode;
4091 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
4092 // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
4093 // followed by a SHXADD with c3 for the X amount.
4094 if (LeftShift && Leading == 0 && C2 < Trailing)
4095 Opcode = RISCV::SRLI;
4096 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
4097 // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
4098 // followed by a SHXADD with c3 for the X amount.
4099 else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
4100 Opcode = RISCV::SRLIW;
4101 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
4102 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
4103 // followed by a SHXADD using c3 for the X amount.
4104 else if (!LeftShift && Leading == C2)
4105 Opcode = RISCV::SRLI;
4106 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
4107 // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
4108 // followed by a SHXADD using c3 for the X amount.
4109 else if (!LeftShift && Leading == 32 + C2)
4110 Opcode = RISCV::SRLIW;
4111 else
4112 return false;
4113
4114 SDLoc DL(N);
4115 EVT VT = N.getValueType();
4116 ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
4117 Val = SDValue(
4118 CurDAG->getMachineNode(Opcode, DL, VT, N0.getOperand(0),
4119 CurDAG->getTargetConstant(ShAmt, DL, VT)),
4120 0);
4121 return true;
4122 }
4123 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
4125 uint64_t Mask = N.getConstantOperandVal(1);
4126 unsigned C2 = N0.getConstantOperandVal(1);
4127
4128 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
4129 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
4130 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
4131 // the X amount.
4132 if (isShiftedMask_64(Mask)) {
4133 unsigned XLen = Subtarget->getXLen();
4134 unsigned Leading = XLen - llvm::bit_width(Mask);
4135 unsigned Trailing = llvm::countr_zero(Mask);
4136 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
4137 SDLoc DL(N);
4138 EVT VT = N.getValueType();
4139 Val = SDValue(CurDAG->getMachineNode(
4140 RISCV::SRAI, DL, VT, N0.getOperand(0),
4141 CurDAG->getTargetConstant(C2 - Leading, DL, VT)),
4142 0);
4143 Val = SDValue(CurDAG->getMachineNode(
4144 RISCV::SRLI, DL, VT, Val,
4145 CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)),
4146 0);
4147 return true;
4148 }
4149 }
4150 }
4151 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
4152 (LeftShift || N.getOpcode() == ISD::SRL) &&
4153 isa<ConstantSDNode>(N.getOperand(1))) {
4154 SDValue N0 = N.getOperand(0);
4155 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
4157 uint64_t Mask = N0.getConstantOperandVal(1);
4158 if (isShiftedMask_64(Mask)) {
4159 unsigned C1 = N.getConstantOperandVal(1);
4160 unsigned XLen = Subtarget->getXLen();
4161 unsigned Leading = XLen - llvm::bit_width(Mask);
4162 unsigned Trailing = llvm::countr_zero(Mask);
4163 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
4164 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
4165 if (LeftShift && Leading == 32 && Trailing > 0 &&
4166 (Trailing + C1) == ShAmt) {
4167 SDLoc DL(N);
4168 EVT VT = N.getValueType();
4169 Val = SDValue(CurDAG->getMachineNode(
4170 RISCV::SRLIW, DL, VT, N0.getOperand(0),
4171 CurDAG->getTargetConstant(Trailing, DL, VT)),
4172 0);
4173 return true;
4174 }
4175 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
4176 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
4177 if (!LeftShift && Leading == 32 && Trailing > C1 &&
4178 (Trailing - C1) == ShAmt) {
4179 SDLoc DL(N);
4180 EVT VT = N.getValueType();
4181 Val = SDValue(CurDAG->getMachineNode(
4182 RISCV::SRLIW, DL, VT, N0.getOperand(0),
4183 CurDAG->getTargetConstant(Trailing, DL, VT)),
4184 0);
4185 return true;
4186 }
4187 }
4188 }
4189 }
4190
4191 return false;
4192}
4193
4194/// Look for various patterns that can be done with a SHL that can be folded
4195/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
4196/// SHXADD_UW we are trying to match.
4198 SDValue &Val) {
4199 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
4200 N.hasOneUse()) {
4201 SDValue N0 = N.getOperand(0);
4202 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
4203 N0.hasOneUse()) {
4204 uint64_t Mask = N.getConstantOperandVal(1);
4205 unsigned C2 = N0.getConstantOperandVal(1);
4206
4207 Mask &= maskTrailingZeros<uint64_t>(C2);
4208
4209 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
4210 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
4211 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
4212 if (isShiftedMask_64(Mask)) {
4213 unsigned Leading = llvm::countl_zero(Mask);
4214 unsigned Trailing = llvm::countr_zero(Mask);
4215 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
4216 SDLoc DL(N);
4217 EVT VT = N.getValueType();
4218 Val = SDValue(CurDAG->getMachineNode(
4219 RISCV::SLLI, DL, VT, N0.getOperand(0),
4220 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
4221 0);
4222 return true;
4223 }
4224 }
4225 }
4226 }
4227
4228 return false;
4229}
4230
4232 assert(N->getOpcode() == ISD::OR || N->getOpcode() == RISCVISD::OR_VL);
4233 if (N->getFlags().hasDisjoint())
4234 return true;
4235 return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
4236}
4237
4238bool RISCVDAGToDAGISel::selectImm64IfCheaper(int64_t Imm, int64_t OrigImm,
4239 SDValue N, SDValue &Val) {
4240 int OrigCost = RISCVMatInt::getIntMatCost(APInt(64, OrigImm), 64, *Subtarget,
4241 /*CompressionCost=*/true);
4242 int Cost = RISCVMatInt::getIntMatCost(APInt(64, Imm), 64, *Subtarget,
4243 /*CompressionCost=*/true);
4244 if (OrigCost <= Cost)
4245 return false;
4246
4247 Val = selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), Imm, *Subtarget);
4248 return true;
4249}
4250
4252 if (!isa<ConstantSDNode>(N))
4253 return false;
4254 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
4255 if ((Imm >> 31) != 1)
4256 return false;
4257
4258 for (const SDNode *U : N->users()) {
4259 switch (U->getOpcode()) {
4260 case ISD::ADD:
4261 break;
4262 case ISD::OR:
4263 if (orDisjoint(U))
4264 break;
4265 return false;
4266 default:
4267 return false;
4268 }
4269 }
4270
4271 return selectImm64IfCheaper(0xffffffff00000000 | Imm, Imm, N, Val);
4272}
4273
4275 if (!isa<ConstantSDNode>(N))
4276 return false;
4277 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
4278 if (isInt<32>(Imm))
4279 return false;
4280 if (Imm == INT64_MIN)
4281 return false;
4282
4283 for (const SDNode *U : N->users()) {
4284 switch (U->getOpcode()) {
4285 case ISD::ADD:
4286 break;
4287 case RISCVISD::VMV_V_X_VL:
4288 if (!all_of(U->users(), [](const SDNode *V) {
4289 return V->getOpcode() == ISD::ADD ||
4290 V->getOpcode() == RISCVISD::ADD_VL;
4291 }))
4292 return false;
4293 break;
4294 default:
4295 return false;
4296 }
4297 }
4298
4299 return selectImm64IfCheaper(-Imm, Imm, N, Val);
4300}
4301
4303 if (!isa<ConstantSDNode>(N))
4304 return false;
4305 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
4306
4307 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
4308 if (isInt<32>(Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
4309 return false;
4310
4311 // Abandon this transform if the constant is needed elsewhere.
4312 for (const SDNode *U : N->users()) {
4313 switch (U->getOpcode()) {
4314 case ISD::AND:
4315 case ISD::OR:
4316 case ISD::XOR:
4317 if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
4318 return false;
4319 break;
4320 case RISCVISD::VMV_V_X_VL:
4321 if (!Subtarget->hasStdExtZvkb())
4322 return false;
4323 if (!all_of(U->users(), [](const SDNode *V) {
4324 return V->getOpcode() == ISD::AND ||
4325 V->getOpcode() == RISCVISD::AND_VL;
4326 }))
4327 return false;
4328 break;
4329 default:
4330 return false;
4331 }
4332 }
4333
4334 if (isInt<32>(Imm)) {
4335 Val =
4336 selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), ~Imm, *Subtarget);
4337 return true;
4338 }
4339
4340 // For 64-bit constants, the instruction sequences get complex,
4341 // so we select inverted only if it's cheaper.
4342 return selectImm64IfCheaper(~Imm, Imm, N, Val);
4343}
4344
4345static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
4346 unsigned Bits,
4347 const TargetInstrInfo *TII) {
4348 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
4349
4350 if (!MCOpcode)
4351 return false;
4352
4353 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
4354 const uint64_t TSFlags = MCID.TSFlags;
4355 if (!RISCVII::hasSEWOp(TSFlags))
4356 return false;
4357 assert(RISCVII::hasVLOp(TSFlags));
4358
4359 unsigned ChainOpIdx = User->getNumOperands() - 1;
4360 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
4361 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
4362 unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
4363 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
4364
4365 if (UserOpNo == VLIdx)
4366 return false;
4367
4368 auto NumDemandedBits =
4369 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
4370 return NumDemandedBits && Bits >= *NumDemandedBits;
4371}
4372
4373// Return true if all users of this SDNode* only consume the lower \p Bits.
4374// This can be used to form W instructions for add/sub/mul/shl even when the
4375// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
4376// SimplifyDemandedBits has made it so some users see a sext_inreg and some
4377// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
4378// the add/sub/mul/shl to become non-W instructions. By checking the users we
4379// may be able to use a W instruction and CSE with the other instruction if
4380// this has happened. We could try to detect that the CSE opportunity exists
4381// before doing this, but that would be more complicated.
4383 const unsigned Depth) const {
4384 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
4385 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
4386 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
4387 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
4388 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
4389 isa<ConstantSDNode>(Node) || Depth != 0) &&
4390 "Unexpected opcode");
4391
4393 return false;
4394
4395 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
4396 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
4397 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
4398 return false;
4399
4400 for (SDUse &Use : Node->uses()) {
4401 SDNode *User = Use.getUser();
4402 // Users of this node should have already been instruction selected
4403 if (!User->isMachineOpcode())
4404 return false;
4405
4406 // TODO: Add more opcodes?
4407 switch (User->getMachineOpcode()) {
4408 default:
4410 break;
4411 return false;
4412 case RISCV::ADDW:
4413 case RISCV::ADDIW:
4414 case RISCV::SUBW:
4415 case RISCV::MULW:
4416 case RISCV::SLLW:
4417 case RISCV::SLLIW:
4418 case RISCV::SRAW:
4419 case RISCV::SRAIW:
4420 case RISCV::SRLW:
4421 case RISCV::SRLIW:
4422 case RISCV::DIVW:
4423 case RISCV::DIVUW:
4424 case RISCV::REMW:
4425 case RISCV::REMUW:
4426 case RISCV::ROLW:
4427 case RISCV::RORW:
4428 case RISCV::RORIW:
4429 case RISCV::CLSW:
4430 case RISCV::CLZW:
4431 case RISCV::CTZW:
4432 case RISCV::CPOPW:
4433 case RISCV::SLLI_UW:
4434 case RISCV::ABSW:
4435 case RISCV::FMV_W_X:
4436 case RISCV::FCVT_H_W:
4437 case RISCV::FCVT_H_W_INX:
4438 case RISCV::FCVT_H_WU:
4439 case RISCV::FCVT_H_WU_INX:
4440 case RISCV::FCVT_S_W:
4441 case RISCV::FCVT_S_W_INX:
4442 case RISCV::FCVT_S_WU:
4443 case RISCV::FCVT_S_WU_INX:
4444 case RISCV::FCVT_D_W:
4445 case RISCV::FCVT_D_W_INX:
4446 case RISCV::FCVT_D_WU:
4447 case RISCV::FCVT_D_WU_INX:
4448 case RISCV::TH_REVW:
4449 case RISCV::TH_SRRIW:
4450 if (Bits >= 32)
4451 break;
4452 return false;
4453 case RISCV::SLL:
4454 case RISCV::SRA:
4455 case RISCV::SRL:
4456 case RISCV::ROL:
4457 case RISCV::ROR:
4458 case RISCV::BSET:
4459 case RISCV::BCLR:
4460 case RISCV::BINV:
4461 // Shift amount operands only use log2(Xlen) bits.
4462 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
4463 break;
4464 return false;
4465 case RISCV::SLLI:
4466 // SLLI only uses the lower (XLen - ShAmt) bits.
4467 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
4468 break;
4469 return false;
4470 case RISCV::ANDI:
4471 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
4472 break;
4473 goto RecCheck;
4474 case RISCV::ORI: {
4475 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
4476 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
4477 break;
4478 [[fallthrough]];
4479 }
4480 case RISCV::AND:
4481 case RISCV::OR:
4482 case RISCV::XOR:
4483 case RISCV::XORI:
4484 case RISCV::ANDN:
4485 case RISCV::ORN:
4486 case RISCV::XNOR:
4487 case RISCV::SH1ADD:
4488 case RISCV::SH2ADD:
4489 case RISCV::SH3ADD:
4490 RecCheck:
4491 if (hasAllNBitUsers(User, Bits, Depth + 1))
4492 break;
4493 return false;
4494 case RISCV::SRLI: {
4495 unsigned ShAmt = User->getConstantOperandVal(1);
4496 // If we are shifting right by less than Bits, and users don't demand any
4497 // bits that were shifted into [Bits-1:0], then we can consider this as an
4498 // N-Bit user.
4499 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
4500 break;
4501 return false;
4502 }
4503 case RISCV::SEXT_B:
4504 case RISCV::PACKH:
4505 if (Bits >= 8)
4506 break;
4507 return false;
4508 case RISCV::SEXT_H:
4509 case RISCV::FMV_H_X:
4510 case RISCV::ZEXT_H_RV32:
4511 case RISCV::ZEXT_H_RV64:
4512 case RISCV::PACKW:
4513 if (Bits >= 16)
4514 break;
4515 return false;
4516 case RISCV::PACK:
4517 if (Bits >= (Subtarget->getXLen() / 2))
4518 break;
4519 return false;
4520 case RISCV::PPAIRE_H:
4521 // If only the lower 32-bits of the result are used, then only the
4522 // lower 16 bits of the inputs are used.
4523 if (Bits >= 16 && hasAllNBitUsers(User, 32, Depth + 1))
4524 break;
4525 return false;
4526 case RISCV::ADD_UW:
4527 case RISCV::SH1ADD_UW:
4528 case RISCV::SH2ADD_UW:
4529 case RISCV::SH3ADD_UW:
4530 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
4531 // 32 bits.
4532 if (Use.getOperandNo() == 0 && Bits >= 32)
4533 break;
4534 return false;
4535 case RISCV::SB:
4536 if (Use.getOperandNo() == 0 && Bits >= 8)
4537 break;
4538 return false;
4539 case RISCV::SH:
4540 if (Use.getOperandNo() == 0 && Bits >= 16)
4541 break;
4542 return false;
4543 case RISCV::SW:
4544 if (Use.getOperandNo() == 0 && Bits >= 32)
4545 break;
4546 return false;
4547 case RISCV::TH_EXT:
4548 case RISCV::TH_EXTU: {
4549 unsigned Msb = User->getConstantOperandVal(1);
4550 unsigned Lsb = User->getConstantOperandVal(2);
4551 // Behavior of Msb < Lsb is not well documented.
4552 if (Msb >= Lsb && Bits > Msb)
4553 break;
4554 return false;
4555 }
4556 }
4557 }
4558
4559 return true;
4560}
4561
4562// Select a constant that can be represented as (sign_extend(imm5) << imm2).
4564 SDValue &Shl2) {
4565 auto *C = dyn_cast<ConstantSDNode>(N);
4566 if (!C)
4567 return false;
4568
4569 int64_t Offset = C->getSExtValue();
4570 for (unsigned Shift = 0; Shift < 4; Shift++) {
4571 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) {
4572 EVT VT = N->getValueType(0);
4573 Simm5 = CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(N), VT);
4574 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), VT);
4575 return true;
4576 }
4577 }
4578
4579 return false;
4580}
4581
4582// Select VL as a 5 bit immediate or a value that will become a register. This
4583// allows us to choose between VSETIVLI or VSETVLI later.
4585 auto *C = dyn_cast<ConstantSDNode>(N);
4586 if (C && isUInt<5>(C->getZExtValue())) {
4587 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
4588 N->getValueType(0));
4589 } else if (C && C->isAllOnes()) {
4590 // Treat all ones as VLMax.
4591 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4592 N->getValueType(0));
4593 } else if (isa<RegisterSDNode>(N) &&
4594 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
4595 // All our VL operands use an operand that allows GPRNoX0 or an immediate
4596 // as the register class. Convert X0 to a special immediate to pass the
4597 // MachineVerifier. This is recognized specially by the vsetvli insertion
4598 // pass.
4599 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4600 N->getValueType(0));
4601 } else {
4602 VL = N;
4603 }
4604
4605 return true;
4606}
4607
4609 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
4610 if (!N.getOperand(0).isUndef())
4611 return SDValue();
4612 N = N.getOperand(1);
4613 }
4614 SDValue Splat = N;
4615 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
4616 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
4617 !Splat.getOperand(0).isUndef())
4618 return SDValue();
4619 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
4620 return Splat;
4621}
4622
4625 if (!Splat)
4626 return false;
4627
4628 SplatVal = Splat.getOperand(1);
4629 return true;
4630}
4631
4633 SelectionDAG &DAG,
4634 const RISCVSubtarget &Subtarget,
4635 std::function<bool(int64_t)> ValidateImm,
4636 bool Decrement = false) {
4638 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
4639 return false;
4640
4641 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
4642 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
4643 "Unexpected splat operand type");
4644
4645 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
4646 // type is wider than the resulting vector element type: an implicit
4647 // truncation first takes place. Therefore, perform a manual
4648 // truncation/sign-extension in order to ignore any truncated bits and catch
4649 // any zero-extended immediate.
4650 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
4651 // sign-extending to (XLenVT -1).
4652 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
4653
4654 int64_t SplatImm = SplatConst.getSExtValue();
4655
4656 if (!ValidateImm(SplatImm))
4657 return false;
4658
4659 if (Decrement)
4660 SplatImm -= 1;
4661
4662 SplatVal =
4663 DAG.getSignedTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
4664 return true;
4665}
4666
4668 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
4669 [](int64_t Imm) { return isInt<5>(Imm); });
4670}
4671
4673 return selectVSplatImmHelper(
4674 N, SplatVal, *CurDAG, *Subtarget,
4675 [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4676 /*Decrement=*/true);
4677}
4678
4680 return selectVSplatImmHelper(
4681 N, SplatVal, *CurDAG, *Subtarget,
4682 [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4683 /*Decrement=*/false);
4684}
4685
4687 SDValue &SplatVal) {
4688 return selectVSplatImmHelper(
4689 N, SplatVal, *CurDAG, *Subtarget,
4690 [](int64_t Imm) { return Imm != 0 && Imm >= -15 && Imm <= 16; },
4691 /*Decrement=*/true);
4692}
4693
4695 SDValue &SplatVal) {
4696 return selectVSplatImmHelper(
4697 N, SplatVal, *CurDAG, *Subtarget,
4698 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
4699}
4700
4703 return Splat && selectNegImm(Splat.getOperand(1), SplatVal);
4704}
4705
4707 auto IsExtOrTrunc = [](SDValue N) {
4708 switch (N->getOpcode()) {
4709 case ISD::SIGN_EXTEND:
4710 case ISD::ZERO_EXTEND:
4711 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
4712 // inactive elements will be undef.
4713 case RISCVISD::TRUNCATE_VECTOR_VL:
4714 case RISCVISD::VSEXT_VL:
4715 case RISCVISD::VZEXT_VL:
4716 return true;
4717 default:
4718 return false;
4719 }
4720 };
4721
4722 // We can have multiple nested nodes, so unravel them all if needed.
4723 while (IsExtOrTrunc(N)) {
4724 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
4725 return false;
4726 N = N->getOperand(0);
4727 }
4728
4729 return selectVSplat(N, SplatVal);
4730}
4731
4733 // Allow bitcasts from XLenVT -> FP.
4734 if (N.getOpcode() == ISD::BITCAST &&
4735 N.getOperand(0).getValueType() == Subtarget->getXLenVT()) {
4736 Imm = N.getOperand(0);
4737 return true;
4738 }
4739 // Allow moves from XLenVT to FP.
4740 if (N.getOpcode() == RISCVISD::FMV_H_X ||
4741 N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
4742 Imm = N.getOperand(0);
4743 return true;
4744 }
4745
4746 // Otherwise, look for FP constants that can materialized with scalar int.
4748 if (!CFP)
4749 return false;
4750 const APFloat &APF = CFP->getValueAPF();
4751 // td can handle +0.0 already.
4752 if (APF.isPosZero())
4753 return false;
4754
4755 MVT VT = CFP->getSimpleValueType(0);
4756
4757 MVT XLenVT = Subtarget->getXLenVT();
4758 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
4759 assert(APF.isNegZero() && "Unexpected constant.");
4760 return false;
4761 }
4762 SDLoc DL(N);
4763 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
4764 *Subtarget);
4765 return true;
4766}
4767
4769 SDValue &Imm) {
4770 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
4771 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
4772
4773 if (!isInt<5>(ImmVal))
4774 return false;
4775
4776 Imm = CurDAG->getSignedTargetConstant(ImmVal, SDLoc(N),
4777 Subtarget->getXLenVT());
4778 return true;
4779 }
4780
4781 return false;
4782}
4783
4784// Match XOR with a VMSET_VL operand. Return the other operand.
4786 if (N.getOpcode() != ISD::XOR)
4787 return false;
4788
4789 if (N.getOperand(0).getOpcode() == RISCVISD::VMSET_VL) {
4790 Res = N.getOperand(1);
4791 return true;
4792 }
4793
4794 if (N.getOperand(1).getOpcode() == RISCVISD::VMSET_VL) {
4795 Res = N.getOperand(0);
4796 return true;
4797 }
4798
4799 return false;
4800}
4801
4802// Match VMXOR_VL with a VMSET_VL operand. Making sure that that VL operand
4803// matches the parent's VL. Return the other operand of the VMXOR_VL.
4805 SDValue &Res) {
4806 if (N.getOpcode() != RISCVISD::VMXOR_VL)
4807 return false;
4808
4809 assert(Parent &&
4810 (Parent->getOpcode() == RISCVISD::VMAND_VL ||
4811 Parent->getOpcode() == RISCVISD::VMOR_VL ||
4812 Parent->getOpcode() == RISCVISD::VMXOR_VL) &&
4813 "Unexpected parent");
4814
4815 // The VL should match the parent.
4816 if (Parent->getOperand(2) != N->getOperand(2))
4817 return false;
4818
4819 if (N.getOperand(0).getOpcode() == RISCVISD::VMSET_VL) {
4820 Res = N.getOperand(1);
4821 return true;
4822 }
4823
4824 if (N.getOperand(1).getOpcode() == RISCVISD::VMSET_VL) {
4825 Res = N.getOperand(0);
4826 return true;
4827 }
4828
4829 return false;
4830}
4831
4832// Try to remove sext.w if the input is a W instruction or can be made into
4833// a W instruction cheaply.
4834bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
4835 // Look for the sext.w pattern, addiw rd, rs1, 0.
4836 if (N->getMachineOpcode() != RISCV::ADDIW ||
4837 !isNullConstant(N->getOperand(1)))
4838 return false;
4839
4840 SDValue N0 = N->getOperand(0);
4841 if (!N0.isMachineOpcode())
4842 return false;
4843
4844 switch (N0.getMachineOpcode()) {
4845 default:
4846 break;
4847 case RISCV::ADD:
4848 case RISCV::ADDI:
4849 case RISCV::SUB:
4850 case RISCV::MUL:
4851 case RISCV::SLLI: {
4852 // Convert sext.w+add/sub/mul to their W instructions. This will create
4853 // a new independent instruction. This improves latency.
4854 unsigned Opc;
4855 switch (N0.getMachineOpcode()) {
4856 default:
4857 llvm_unreachable("Unexpected opcode!");
4858 case RISCV::ADD: Opc = RISCV::ADDW; break;
4859 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
4860 case RISCV::SUB: Opc = RISCV::SUBW; break;
4861 case RISCV::MUL: Opc = RISCV::MULW; break;
4862 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
4863 }
4864
4865 SDValue N00 = N0.getOperand(0);
4866 SDValue N01 = N0.getOperand(1);
4867
4868 // Shift amount needs to be uimm5.
4869 if (N0.getMachineOpcode() == RISCV::SLLI &&
4870 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
4871 break;
4872
4873 SDNode *Result =
4874 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
4875 N00, N01);
4876 ReplaceUses(N, Result);
4877 return true;
4878 }
4879 case RISCV::ADDW:
4880 case RISCV::ADDIW:
4881 case RISCV::SUBW:
4882 case RISCV::MULW:
4883 case RISCV::SLLIW:
4884 case RISCV::PACKW:
4885 case RISCV::TH_MULAW:
4886 case RISCV::TH_MULAH:
4887 case RISCV::TH_MULSW:
4888 case RISCV::TH_MULSH:
4889 if (N0.getValueType() == MVT::i32)
4890 break;
4891
4892 // Result is already sign extended just remove the sext.w.
4893 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
4894 ReplaceUses(N, N0.getNode());
4895 return true;
4896 }
4897
4898 return false;
4899}
4900
4901static bool usesAllOnesMask(SDValue MaskOp) {
4902 const auto IsVMSet = [](unsigned Opc) {
4903 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
4904 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
4905 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
4906 Opc == RISCV::PseudoVMSET_M_B8;
4907 };
4908
4909 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
4910 // undefined behaviour if it's the wrong bitwidth, so we could choose to
4911 // assume that it's all-ones? Same applies to its VL.
4912 return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode());
4913}
4914
4915static bool isImplicitDef(SDValue V) {
4916 if (!V.isMachineOpcode())
4917 return false;
4918 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
4919 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
4920 if (!isImplicitDef(V.getOperand(I)))
4921 return false;
4922 return true;
4923 }
4924 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
4925}
4926
4927// Optimize masked RVV pseudo instructions with a known all-ones mask to their
4928// corresponding "unmasked" pseudo versions.
4929bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
4930 const RISCV::RISCVMaskedPseudoInfo *I =
4931 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
4932 if (!I)
4933 return false;
4934
4935 unsigned MaskOpIdx = I->MaskOpIdx;
4936 if (!usesAllOnesMask(N->getOperand(MaskOpIdx)))
4937 return false;
4938
4939 // There are two classes of pseudos in the table - compares and
4940 // everything else. See the comment on RISCVMaskedPseudo for details.
4941 const unsigned Opc = I->UnmaskedPseudo;
4942 const MCInstrDesc &MCID = TII->get(Opc);
4943 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID);
4944
4945 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
4946 const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(MaskedMCID);
4947
4948 assert((RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ||
4950 "Unmasked pseudo has policy but masked pseudo doesn't?");
4951 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru &&
4952 "Unexpected pseudo structure");
4953 assert(!(HasPassthru && !MaskedHasPassthru) &&
4954 "Unmasked pseudo has passthru but masked pseudo doesn't?");
4955
4957 // Skip the passthru operand at index 0 if the unmasked don't have one.
4958 bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
4959 bool DropPolicy = !RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
4960 RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags);
4961 bool HasChainOp =
4962 N->getOperand(N->getNumOperands() - 1).getValueType() == MVT::Other;
4963 unsigned LastOpNum = N->getNumOperands() - 1 - HasChainOp;
4964 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
4965 // Skip the mask
4966 SDValue Op = N->getOperand(I);
4967 if (I == MaskOpIdx)
4968 continue;
4969 if (DropPolicy && I == LastOpNum)
4970 continue;
4971 Ops.push_back(Op);
4972 }
4973
4974 MachineSDNode *Result =
4975 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4976
4977 if (!N->memoperands_empty())
4978 CurDAG->setNodeMemRefs(Result, N->memoperands());
4979
4980 Result->setFlags(N->getFlags());
4981 ReplaceUses(N, Result);
4982
4983 return true;
4984}
4985
4986/// If our passthru is an implicit_def, use noreg instead. This side
4987/// steps issues with MachineCSE not being able to CSE expressions with
4988/// IMPLICIT_DEF operands while preserving the semantic intent. See
4989/// pr64282 for context. Note that this transform is the last one
4990/// performed at ISEL DAG to DAG.
4991bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4992 bool MadeChange = false;
4993 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4994
4995 while (Position != CurDAG->allnodes_begin()) {
4996 SDNode *N = &*--Position;
4997 if (N->use_empty() || !N->isMachineOpcode())
4998 continue;
4999
5000 const unsigned Opc = N->getMachineOpcode();
5001 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
5003 !isImplicitDef(N->getOperand(0)))
5004 continue;
5005
5007 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
5008 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
5009 SDValue Op = N->getOperand(I);
5010 Ops.push_back(Op);
5011 }
5012
5013 MachineSDNode *Result =
5014 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
5015 Result->setFlags(N->getFlags());
5016 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
5017 ReplaceUses(N, Result);
5018 MadeChange = true;
5019 }
5020 return MadeChange;
5021}
5022
5023
5024// This pass converts a legalized DAG into a RISCV-specific DAG, ready
5025// for instruction scheduling.
5027 CodeGenOptLevel OptLevel) {
5028 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
5029}
5030
5032
5037
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define X(NUM, ENUM, NAME)
Definition ELF.h:856
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
static constexpr Value * getValue(Ty &ValueOrUse)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool getVal(MDTuple *MD, const char *Key, uint64_t &Val)
static bool usesAllOnesMask(SDValue MaskOp)
static Register getTileReg(uint64_t TileNum)
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
static bool isRegRegScaleLoadOrStore(SDNode *User, SDValue Add, const RISCVSubtarget &Subtarget)
Return true if this a load/store that we have a RegRegScale instruction for.
static std::pair< SDValue, SDValue > extractGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, SDValue Pair)
#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm, bool Decrement=false)
static unsigned getSegInstNF(unsigned Intrinsic)
static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget, SDValue Add, SDValue Shift=SDValue())
Is it profitable to fold this Add into RegRegScale load/store.
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false)
#define INST_ALL_NF_CASE_WITH_FF(NAME)
#define CASE_VMSLT_OPCODES(lmulenum, suffix)
static SDValue buildGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, MVT VT, SDValue Lo, SDValue Hi)
bool isRegImmLoadOrStore(SDNode *User, SDValue Add)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
static SDValue findVSplat(SDValue N)
static bool isApplicableToPLIOrPLUI(int Val)
#define INST_ALL_NF_CASE(NAME)
cl::opt< uint32_t > PreferredLandingPadLabel("riscv-landing-pad-label", cl::ReallyHidden, cl::desc("Use preferred fixed label for all labels"))
Contains matchers for matching SelectionDAG nodes and values.
#define LLVM_DEBUG(...)
Definition Debug.h:119
#define PASS_NAME
DEMANGLE_DUMP_METHOD void dump() const
bool isZero() const
Definition APFloat.h:1540
APInt bitcastToAPInt() const
Definition APFloat.h:1436
bool isPosZero() const
Definition APFloat.h:1555
bool isNegZero() const
Definition APFloat.h:1556
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
LLVM_ABI bool isSplat(unsigned SplatSizeInBits) const
Check if the APInt consists of a repeated bit pattern.
Definition APInt.cpp:631
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
This class is used to form a handle around another node that is persistent and is updated across invo...
const SDValue & getValue() const
static StringRef getMemConstraintName(ConstraintCode C)
Definition InlineAsm.h:475
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Machine Value Type.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TargetMachine, CodeGenOptLevel OptLevel)
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectNegImm(SDValue N, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool areOffsetsWithinAlignment(SDValue Addr, Align Alignment)
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset should be all zeros.
bool selectZExtImm32(SDValue N, SDValue &Val)
bool SelectAddrRegZextRegScale(SDValue Addr, unsigned MaxShiftAmount, unsigned Bits, SDValue &Base, SDValue &Index, SDValue &Scale)
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset)
bool selectVMNOT_VLOp(SDNode *Parent, SDValue N, SDValue &Res)
void selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
void selectVLSEGFF(SDNode *Node, unsigned NF, bool IsMasked)
bool selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool orDisjoint(const SDNode *Node) const
bool tryWideningMulAcc(SDNode *Node, const SDLoc &DL)
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool tryUnsignedBitfieldInsertInZero(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
bool selectInvLogicImm(SDValue N, SDValue &Val)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
void selectXSfmmVSET(SDNode *Node)
bool trySignedBitfieldInsertInSign(SDNode *Node)
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool SelectAddrRegImm9(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
bool selectScalarFPAsInt(SDValue N, SDValue &Imm)
bool hasAllBUsers(SDNode *Node) const
void selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
bool selectVMNOTOp(SDValue N, SDValue &Res)
void selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool selectVSplatImm64Neg(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
void selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
bool hasShlAdd(int64_t ShAmt) const
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVVType::VLMUL getLMUL(MVT VT)
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:36
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
iterator_range< user_iterator > users()
Definition Value.h:426
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:827
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:861
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:852
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:858
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:896
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:986
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI unsigned encodeXSfmmVType(unsigned SEW, unsigned Widen, bool AltFmt)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
LLVM_ABI unsigned getSEWLMULRatio(unsigned SEW, VLMUL VLMul)
static unsigned decodeTWiden(unsigned TWiden)
LLVM_ABI unsigned encodeVTYPE(VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic, bool AltFmt=false)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
std::optional< unsigned > getVectorLowDemandedScalarBits(unsigned Opcode, unsigned Log2SEW)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:573
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
static const MachineMemOperand::Flags MONontemporalBit1
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:315
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:325
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
unsigned M1(unsigned Val)
Definition VE.h:377
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:263
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
constexpr T maskTrailingZeros(unsigned N)
Create a bitmask with the N right-most bits set to 0, and all other bits set to 1.
Definition MathExtras.h:94
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:418
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.