LLVM 23.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
22#include "llvm/IR/IntrinsicsRISCV.h"
24#include "llvm/Support/Debug.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "riscv-isel"
31#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
32
34 "riscv-use-rematerializable-movimm", cl::Hidden,
35 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
36 "constant materialization"),
37 cl::init(false));
38
39#define GET_DAGISEL_BODY RISCVDAGToDAGISel
40#include "RISCVGenDAGISel.inc"
41
43 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
44
45 bool MadeChange = false;
46 while (Position != CurDAG->allnodes_begin()) {
47 SDNode *N = &*--Position;
48 if (N->use_empty())
49 continue;
50
51 SDValue Result;
52 switch (N->getOpcode()) {
53 case ISD::SPLAT_VECTOR: {
54 if (Subtarget->hasStdExtP())
55 break;
56 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
57 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
58 MVT VT = N->getSimpleValueType(0);
59 unsigned Opc =
60 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
61 SDLoc DL(N);
62 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
63 SDValue Src = N->getOperand(0);
64 if (VT.isInteger())
65 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
66 N->getOperand(0));
67 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
68 break;
69 }
70 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
71 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
72 // load. Done after lowering and combining so that we have a chance to
73 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
74 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
75 MVT VT = N->getSimpleValueType(0);
76 SDValue Passthru = N->getOperand(0);
77 SDValue Lo = N->getOperand(1);
78 SDValue Hi = N->getOperand(2);
79 SDValue VL = N->getOperand(3);
80 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
81 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
82 "Unexpected VTs!");
83 MachineFunction &MF = CurDAG->getMachineFunction();
84 SDLoc DL(N);
85
86 // Create temporary stack for each expanding node.
87 SDValue StackSlot =
88 CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
89 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
91
92 SDValue Chain = CurDAG->getEntryNode();
93 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
94
95 SDValue OffsetSlot =
96 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
97 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
98 Align(8));
99
100 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
101
102 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
103 SDValue IntID =
104 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
105 SDValue Ops[] = {Chain,
106 IntID,
107 Passthru,
108 StackSlot,
109 CurDAG->getRegister(RISCV::X0, MVT::i64),
110 VL};
111
112 Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
113 MVT::i64, MPI, Align(8),
115 break;
116 }
117 case ISD::FP_EXTEND: {
118 // We only have vector patterns for riscv_fpextend_vl in isel.
119 SDLoc DL(N);
120 MVT VT = N->getSimpleValueType(0);
121 if (!VT.isVector())
122 break;
123 SDValue VLMAX = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
124 SDValue TrueMask = CurDAG->getNode(
125 RISCVISD::VMSET_VL, DL, VT.changeVectorElementType(MVT::i1), VLMAX);
126 Result = CurDAG->getNode(RISCVISD::FP_EXTEND_VL, DL, VT, N->getOperand(0),
127 TrueMask, VLMAX);
128 break;
129 }
130 }
131
132 if (Result) {
133 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
134 LLVM_DEBUG(N->dump(CurDAG));
135 LLVM_DEBUG(dbgs() << "\nNew: ");
136 LLVM_DEBUG(Result->dump(CurDAG));
137 LLVM_DEBUG(dbgs() << "\n");
138
139 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
140 MadeChange = true;
141 }
142 }
143
144 if (MadeChange)
145 CurDAG->RemoveDeadNodes();
146}
147
149 HandleSDNode Dummy(CurDAG->getRoot());
150 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
151
152 bool MadeChange = false;
153 while (Position != CurDAG->allnodes_begin()) {
154 SDNode *N = &*--Position;
155 // Skip dead nodes and any non-machine opcodes.
156 if (N->use_empty() || !N->isMachineOpcode())
157 continue;
158
159 MadeChange |= doPeepholeSExtW(N);
160
161 // FIXME: This is here only because the VMerge transform doesn't
162 // know how to handle masked true inputs. Once that has been moved
163 // to post-ISEL, this can be deleted as well.
164 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
165 }
166
167 CurDAG->setRoot(Dummy.getValue());
168
169 // After we're done with everything else, convert IMPLICIT_DEF
170 // passthru operands to NoRegister. This is required to workaround
171 // an optimization deficiency in MachineCSE. This really should
172 // be merged back into each of the patterns (i.e. there's no good
173 // reason not to go directly to NoReg), but is being done this way
174 // to allow easy backporting.
175 MadeChange |= doPeepholeNoRegPassThru();
176
177 if (MadeChange)
178 CurDAG->RemoveDeadNodes();
179}
180
181static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
183 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
184 for (const RISCVMatInt::Inst &Inst : Seq) {
185 SDValue SDImm = CurDAG->getSignedTargetConstant(Inst.getImm(), DL, VT);
186 SDNode *Result = nullptr;
187 switch (Inst.getOpndKind()) {
188 case RISCVMatInt::Imm:
189 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
190 break;
192 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
193 CurDAG->getRegister(RISCV::X0, VT));
194 break;
196 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
197 break;
199 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
200 break;
201 }
202
203 // Only the first instruction has X0 as its source.
204 SrcReg = SDValue(Result, 0);
205 }
206
207 return SrcReg;
208}
209
210static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
211 int64_t Imm, const RISCVSubtarget &Subtarget) {
213
214 // Use a rematerializable pseudo instruction for short sequences if enabled.
215 if (Seq.size() == 2 && UsePseudoMovImm)
216 return SDValue(
217 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
218 CurDAG->getSignedTargetConstant(Imm, DL, VT)),
219 0);
220
221 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
222 // worst an LUI+ADDIW. This will require an extra register, but avoids a
223 // constant pool.
224 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
225 // low and high 32 bits are the same and bit 31 and 63 are set.
226 if (Seq.size() > 3) {
227 unsigned ShiftAmt, AddOpc;
229 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
230 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
231 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
232
233 SDValue SLLI = SDValue(
234 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
235 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
236 0);
237 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
238 }
239 }
240
241 // Otherwise, use the original sequence.
242 return selectImmSeq(CurDAG, DL, VT, Seq);
243}
244
246 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
247 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
248 bool IsLoad, MVT *IndexVT) {
249 SDValue Chain = Node->getOperand(0);
250
251 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
252
253 if (IsStridedOrIndexed) {
254 Operands.push_back(Node->getOperand(CurOp++)); // Index.
255 if (IndexVT)
256 *IndexVT = Operands.back()->getSimpleValueType(0);
257 }
258
259 if (IsMasked) {
260 SDValue Mask = Node->getOperand(CurOp++);
261 Operands.push_back(Mask);
262 }
263 SDValue VL;
264 selectVLOp(Node->getOperand(CurOp++), VL);
265 Operands.push_back(VL);
266
267 MVT XLenVT = Subtarget->getXLenVT();
268 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
269 Operands.push_back(SEWOp);
270
271 // At the IR layer, all the masked load intrinsics have policy operands,
272 // none of the others do. All have passthru operands. For our pseudos,
273 // all loads have policy operands.
274 if (IsLoad) {
276 if (IsMasked)
277 Policy = Node->getConstantOperandVal(CurOp++);
278 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
279 Operands.push_back(PolicyOp);
280 }
281
282 Operands.push_back(Chain); // Chain.
283}
284
285void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
286 bool IsStrided) {
287 SDLoc DL(Node);
288 MVT VT = Node->getSimpleValueType(0);
289 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
291
292 unsigned CurOp = 2;
294
295 Operands.push_back(Node->getOperand(CurOp++));
296
297 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
298 Operands, /*IsLoad=*/true);
299
300 const RISCV::VLSEGPseudo *P =
301 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
302 static_cast<unsigned>(LMUL));
303 MachineSDNode *Load =
304 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
305
306 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
307
308 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
309 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
310 CurDAG->RemoveDeadNode(Node);
311}
312
314 bool IsMasked) {
315 SDLoc DL(Node);
316 MVT VT = Node->getSimpleValueType(0);
317 MVT XLenVT = Subtarget->getXLenVT();
318 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
320
321 unsigned CurOp = 2;
323
324 Operands.push_back(Node->getOperand(CurOp++));
325
326 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
327 /*IsStridedOrIndexed*/ false, Operands,
328 /*IsLoad=*/true);
329
330 const RISCV::VLSEGPseudo *P =
331 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
332 Log2SEW, static_cast<unsigned>(LMUL));
333 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
334 XLenVT, MVT::Other, Operands);
335
336 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
337
338 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); // Result
339 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); // VL
340 ReplaceUses(SDValue(Node, 2), SDValue(Load, 2)); // Chain
341 CurDAG->RemoveDeadNode(Node);
342}
343
344void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
345 bool IsOrdered) {
346 SDLoc DL(Node);
347 MVT VT = Node->getSimpleValueType(0);
348 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
350
351 unsigned CurOp = 2;
353
354 Operands.push_back(Node->getOperand(CurOp++));
355
356 MVT IndexVT;
357 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
358 /*IsStridedOrIndexed*/ true, Operands,
359 /*IsLoad=*/true, &IndexVT);
360
361#ifndef NDEBUG
362 // Number of element = RVVBitsPerBlock * LMUL / SEW
363 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
364 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
365 if (DecodedLMUL.second)
366 ContainedTyNumElts /= DecodedLMUL.first;
367 else
368 ContainedTyNumElts *= DecodedLMUL.first;
369 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
370 "Element count mismatch");
371#endif
372
374 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
375 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
376 reportFatalUsageError("The V extension does not support EEW=64 for index "
377 "values when XLEN=32");
378 }
379 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
380 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
381 static_cast<unsigned>(IndexLMUL));
382 MachineSDNode *Load =
383 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
384
385 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
386
387 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
388 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
389 CurDAG->RemoveDeadNode(Node);
390}
391
392void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
393 bool IsStrided) {
394 SDLoc DL(Node);
395 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
396 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
398
399 unsigned CurOp = 2;
401
402 Operands.push_back(Node->getOperand(CurOp++));
403
404 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
405 Operands);
406
407 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
408 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
409 MachineSDNode *Store =
410 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
411
412 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
413
414 ReplaceNode(Node, Store);
415}
416
417void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
418 bool IsOrdered) {
419 SDLoc DL(Node);
420 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
421 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
423
424 unsigned CurOp = 2;
426
427 Operands.push_back(Node->getOperand(CurOp++));
428
429 MVT IndexVT;
430 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
431 /*IsStridedOrIndexed*/ true, Operands,
432 /*IsLoad=*/false, &IndexVT);
433
434#ifndef NDEBUG
435 // Number of element = RVVBitsPerBlock * LMUL / SEW
436 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
437 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
438 if (DecodedLMUL.second)
439 ContainedTyNumElts /= DecodedLMUL.first;
440 else
441 ContainedTyNumElts *= DecodedLMUL.first;
442 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
443 "Element count mismatch");
444#endif
445
447 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
448 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
449 reportFatalUsageError("The V extension does not support EEW=64 for index "
450 "values when XLEN=32");
451 }
452 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
453 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
454 static_cast<unsigned>(IndexLMUL));
455 MachineSDNode *Store =
456 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
457
458 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
459
460 ReplaceNode(Node, Store);
461}
462
464 if (!Subtarget->hasVInstructions())
465 return;
466
467 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
468
469 SDLoc DL(Node);
470 MVT XLenVT = Subtarget->getXLenVT();
471
472 unsigned IntNo = Node->getConstantOperandVal(0);
473
474 assert((IntNo == Intrinsic::riscv_vsetvli ||
475 IntNo == Intrinsic::riscv_vsetvlimax) &&
476 "Unexpected vsetvli intrinsic");
477
478 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
479 unsigned Offset = (VLMax ? 1 : 2);
480
481 assert(Node->getNumOperands() == Offset + 2 &&
482 "Unexpected number of operands");
483
484 unsigned SEW =
485 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
486 RISCVVType::VLMUL VLMul = static_cast<RISCVVType::VLMUL>(
487 Node->getConstantOperandVal(Offset + 1) & 0x7);
488
489 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
490 /*MaskAgnostic*/ true);
491 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
492
493 SDValue VLOperand;
494 unsigned Opcode = RISCV::PseudoVSETVLI;
495 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
496 if (auto VLEN = Subtarget->getRealVLen())
497 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
498 VLMax = true;
499 }
500 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
501 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
502 Opcode = RISCV::PseudoVSETVLIX0;
503 } else {
504 VLOperand = Node->getOperand(1);
505
506 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
507 uint64_t AVL = C->getZExtValue();
508 if (isUInt<5>(AVL)) {
509 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
510 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
511 XLenVT, VLImm, VTypeIOp));
512 return;
513 }
514 }
515 }
516
518 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
519}
520
522 if (!Subtarget->hasVendorXSfmmbase())
523 return;
524
525 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
526
527 SDLoc DL(Node);
528 MVT XLenVT = Subtarget->getXLenVT();
529
530 unsigned IntNo = Node->getConstantOperandVal(0);
531
532 assert((IntNo == Intrinsic::riscv_sf_vsettnt ||
533 IntNo == Intrinsic::riscv_sf_vsettm ||
534 IntNo == Intrinsic::riscv_sf_vsettk) &&
535 "Unexpected XSfmm vset intrinsic");
536
537 unsigned SEW = RISCVVType::decodeVSEW(Node->getConstantOperandVal(2));
538 unsigned Widen = RISCVVType::decodeTWiden(Node->getConstantOperandVal(3));
539 unsigned PseudoOpCode =
540 IntNo == Intrinsic::riscv_sf_vsettnt ? RISCV::PseudoSF_VSETTNT
541 : IntNo == Intrinsic::riscv_sf_vsettm ? RISCV::PseudoSF_VSETTM
542 : RISCV::PseudoSF_VSETTK;
543
544 if (IntNo == Intrinsic::riscv_sf_vsettnt) {
545 unsigned VTypeI = RISCVVType::encodeXSfmmVType(SEW, Widen, 0);
546 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
547
548 ReplaceNode(Node, CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
549 Node->getOperand(1), VTypeIOp));
550 } else {
551 SDValue Log2SEW = CurDAG->getTargetConstant(Log2_32(SEW), DL, XLenVT);
552 SDValue TWiden = CurDAG->getTargetConstant(Widen, DL, XLenVT);
554 CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
555 Node->getOperand(1), Log2SEW, TWiden));
556 }
557}
558
560 MVT VT = Node->getSimpleValueType(0);
561 unsigned Opcode = Node->getOpcode();
562 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
563 "Unexpected opcode");
564 SDLoc DL(Node);
565
566 // For operations of the form (x << C1) op C2, check if we can use
567 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
568 SDValue N0 = Node->getOperand(0);
569 SDValue N1 = Node->getOperand(1);
570
572 if (!Cst)
573 return false;
574
575 int64_t Val = Cst->getSExtValue();
576
577 // Check if immediate can already use ANDI/ORI/XORI.
578 if (isInt<12>(Val))
579 return false;
580
581 SDValue Shift = N0;
582
583 // If Val is simm32 and we have a sext_inreg from i32, then the binop
584 // produces at least 33 sign bits. We can peek through the sext_inreg and use
585 // a SLLIW at the end.
586 bool SignExt = false;
587 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
588 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
589 SignExt = true;
590 Shift = N0.getOperand(0);
591 }
592
593 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
594 return false;
595
597 if (!ShlCst)
598 return false;
599
600 uint64_t ShAmt = ShlCst->getZExtValue();
601
602 // Make sure that we don't change the operation by removing bits.
603 // This only matters for OR and XOR, AND is unaffected.
604 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
605 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
606 return false;
607
608 int64_t ShiftedVal = Val >> ShAmt;
609 if (!isInt<12>(ShiftedVal))
610 return false;
611
612 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
613 if (SignExt && ShAmt >= 32)
614 return false;
615
616 // Ok, we can reorder to get a smaller immediate.
617 unsigned BinOpc;
618 switch (Opcode) {
619 default: llvm_unreachable("Unexpected opcode");
620 case ISD::AND: BinOpc = RISCV::ANDI; break;
621 case ISD::OR: BinOpc = RISCV::ORI; break;
622 case ISD::XOR: BinOpc = RISCV::XORI; break;
623 }
624
625 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
626
627 SDNode *BinOp = CurDAG->getMachineNode(
628 BinOpc, DL, VT, Shift.getOperand(0),
629 CurDAG->getSignedTargetConstant(ShiftedVal, DL, VT));
630 SDNode *SLLI =
631 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
632 CurDAG->getTargetConstant(ShAmt, DL, VT));
633 ReplaceNode(Node, SLLI);
634 return true;
635}
636
638 unsigned Opc;
639
640 if (Subtarget->hasVendorXTHeadBb())
641 Opc = RISCV::TH_EXT;
642 else if (Subtarget->hasVendorXAndesPerf())
643 Opc = RISCV::NDS_BFOS;
644 else if (Subtarget->hasVendorXqcibm())
645 Opc = RISCV::QC_EXT;
646 else
647 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
648 return false;
649
650 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
651 if (!N1C)
652 return false;
653
654 SDValue N0 = Node->getOperand(0);
655 if (!N0.hasOneUse())
656 return false;
657
658 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb,
659 const SDLoc &DL, MVT VT) {
660 if (Opc == RISCV::QC_EXT) {
661 // QC.EXT X, width, shamt
662 // shamt is the same as Lsb
663 // width is the number of bits to extract from the Lsb
664 Msb = Msb - Lsb + 1;
665 }
666 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
667 CurDAG->getTargetConstant(Msb, DL, VT),
668 CurDAG->getTargetConstant(Lsb, DL, VT));
669 };
670
671 SDLoc DL(Node);
672 MVT VT = Node->getSimpleValueType(0);
673 const unsigned RightShAmt = N1C->getZExtValue();
674
675 // Transform (sra (shl X, C1) C2) with C1 < C2
676 // -> (SignedBitfieldExtract X, msb, lsb)
677 if (N0.getOpcode() == ISD::SHL) {
678 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
679 if (!N01C)
680 return false;
681
682 const unsigned LeftShAmt = N01C->getZExtValue();
683 // Make sure that this is a bitfield extraction (i.e., the shift-right
684 // amount can not be less than the left-shift).
685 if (LeftShAmt > RightShAmt)
686 return false;
687
688 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
689 const unsigned Msb = MsbPlusOne - 1;
690 const unsigned Lsb = RightShAmt - LeftShAmt;
691
692 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
693 ReplaceNode(Node, Sbe);
694 return true;
695 }
696
697 // Transform (sra (sext_inreg X, _), C) ->
698 // (SignedBitfieldExtract X, msb, lsb)
699 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
700 unsigned ExtSize =
701 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
702
703 // ExtSize of 32 should use sraiw via tablegen pattern.
704 if (ExtSize == 32)
705 return false;
706
707 const unsigned Msb = ExtSize - 1;
708 // If the shift-right amount is greater than Msb, it means that extracts
709 // the X[Msb] bit and sign-extend it.
710 const unsigned Lsb = RightShAmt > Msb ? Msb : RightShAmt;
711
712 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
713 ReplaceNode(Node, Sbe);
714 return true;
715 }
716
717 return false;
718}
719
721 // Only supported with XAndesPerf at the moment.
722 if (!Subtarget->hasVendorXAndesPerf())
723 return false;
724
725 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
726 if (!N1C)
727 return false;
728
729 SDValue N0 = Node->getOperand(0);
730 if (!N0.hasOneUse())
731 return false;
732
733 auto BitfieldInsert = [&](SDValue N0, unsigned Msb, unsigned Lsb,
734 const SDLoc &DL, MVT VT) {
735 unsigned Opc = RISCV::NDS_BFOS;
736 // If the Lsb is equal to the Msb, then the Lsb should be 0.
737 if (Lsb == Msb)
738 Lsb = 0;
739 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
740 CurDAG->getTargetConstant(Lsb, DL, VT),
741 CurDAG->getTargetConstant(Msb, DL, VT));
742 };
743
744 SDLoc DL(Node);
745 MVT VT = Node->getSimpleValueType(0);
746 const unsigned RightShAmt = N1C->getZExtValue();
747
748 // Transform (sra (shl X, C1) C2) with C1 > C2
749 // -> (NDS.BFOS X, lsb, msb)
750 if (N0.getOpcode() == ISD::SHL) {
751 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
752 if (!N01C)
753 return false;
754
755 const unsigned LeftShAmt = N01C->getZExtValue();
756 // Make sure that this is a bitfield insertion (i.e., the shift-right
757 // amount should be less than the left-shift).
758 if (LeftShAmt <= RightShAmt)
759 return false;
760
761 const unsigned MsbPlusOne = VT.getSizeInBits() - RightShAmt;
762 const unsigned Msb = MsbPlusOne - 1;
763 const unsigned Lsb = LeftShAmt - RightShAmt;
764
765 SDNode *Sbi = BitfieldInsert(N0, Msb, Lsb, DL, VT);
766 ReplaceNode(Node, Sbi);
767 return true;
768 }
769
770 return false;
771}
772
774 const SDLoc &DL, MVT VT,
775 SDValue X, unsigned Msb,
776 unsigned Lsb) {
777 unsigned Opc;
778
779 if (Subtarget->hasVendorXTHeadBb()) {
780 Opc = RISCV::TH_EXTU;
781 } else if (Subtarget->hasVendorXAndesPerf()) {
782 Opc = RISCV::NDS_BFOZ;
783 } else if (Subtarget->hasVendorXqcibm()) {
784 Opc = RISCV::QC_EXTU;
785 // QC.EXTU X, width, shamt
786 // shamt is the same as Lsb
787 // width is the number of bits to extract from the Lsb
788 Msb = Msb - Lsb + 1;
789 } else {
790 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
791 return false;
792 }
793
794 SDNode *Ube = CurDAG->getMachineNode(Opc, DL, VT, X,
795 CurDAG->getTargetConstant(Msb, DL, VT),
796 CurDAG->getTargetConstant(Lsb, DL, VT));
797 ReplaceNode(Node, Ube);
798 return true;
799}
800
802 const SDLoc &DL, MVT VT,
803 SDValue X, unsigned Msb,
804 unsigned Lsb) {
805 // Only supported with XAndesPerf at the moment.
806 if (!Subtarget->hasVendorXAndesPerf())
807 return false;
808
809 unsigned Opc = RISCV::NDS_BFOZ;
810
811 // If the Lsb is equal to the Msb, then the Lsb should be 0.
812 if (Lsb == Msb)
813 Lsb = 0;
814 SDNode *Ubi = CurDAG->getMachineNode(Opc, DL, VT, X,
815 CurDAG->getTargetConstant(Lsb, DL, VT),
816 CurDAG->getTargetConstant(Msb, DL, VT));
817 ReplaceNode(Node, Ubi);
818 return true;
819}
820
822 // Target does not support indexed loads.
823 if (!Subtarget->hasVendorXTHeadMemIdx())
824 return false;
825
828 if (AM == ISD::UNINDEXED)
829 return false;
830
832 if (!C)
833 return false;
834
835 EVT LoadVT = Ld->getMemoryVT();
836 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
837 "Unexpected addressing mode");
838 bool IsPre = AM == ISD::PRE_INC;
839 bool IsPost = AM == ISD::POST_INC;
840 int64_t Offset = C->getSExtValue();
841
842 // The constants that can be encoded in the THeadMemIdx instructions
843 // are of the form (sign_extend(imm5) << imm2).
844 unsigned Shift;
845 for (Shift = 0; Shift < 4; Shift++)
846 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
847 break;
848
849 // Constant cannot be encoded.
850 if (Shift == 4)
851 return false;
852
853 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
854 unsigned Opcode;
855 if (LoadVT == MVT::i8 && IsPre)
856 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
857 else if (LoadVT == MVT::i8 && IsPost)
858 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
859 else if (LoadVT == MVT::i16 && IsPre)
860 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
861 else if (LoadVT == MVT::i16 && IsPost)
862 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
863 else if (LoadVT == MVT::i32 && IsPre)
864 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
865 else if (LoadVT == MVT::i32 && IsPost)
866 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
867 else if (LoadVT == MVT::i64 && IsPre)
868 Opcode = RISCV::TH_LDIB;
869 else if (LoadVT == MVT::i64 && IsPost)
870 Opcode = RISCV::TH_LDIA;
871 else
872 return false;
873
874 EVT Ty = Ld->getOffset().getValueType();
875 SDValue Ops[] = {
876 Ld->getBasePtr(),
877 CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
878 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), Ld->getChain()};
879 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
880 Ld->getValueType(1), MVT::Other, Ops);
881
882 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
883 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
884
885 ReplaceNode(Node, New);
886
887 return true;
888}
889
890static SDValue buildGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, MVT VT,
891 SDValue Lo, SDValue Hi) {
892 SDValue Ops[] = {
893 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Lo,
894 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Hi,
895 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
896
897 return SDValue(
898 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops), 0);
899}
900
901// Helper to extract Lo and Hi values from a GPR pair.
902static std::pair<SDValue, SDValue>
904 SDValue Lo =
905 CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, MVT::i32, Pair);
906 SDValue Hi =
907 CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, MVT::i32, Pair);
908 return {Lo, Hi};
909}
910
911// Try to match WMACC pattern: ADDD where one operand pair comes from a
912// widening multiply (both results of UMUL_LOHI, SMUL_LOHI, or WMULSU).
914 assert(Node->getOpcode() == RISCVISD::ADDD && "Expected ADDD");
915
916 SDValue Op0Lo = Node->getOperand(0);
917 SDValue Op0Hi = Node->getOperand(1);
918 SDValue Op1Lo = Node->getOperand(2);
919 SDValue Op1Hi = Node->getOperand(3);
920
921 auto IsSupportedMulWithOneUse = [](SDValue Lo, SDValue Hi) {
922 unsigned Opc = Lo.getOpcode();
923 if (Opc != ISD::UMUL_LOHI && Opc != ISD::SMUL_LOHI &&
924 Opc != RISCVISD::WMULSU)
925 return false;
926 return Lo.getNode() == Hi.getNode() && Lo.getResNo() == 0 &&
927 Hi.getResNo() == 1 && Lo.hasOneUse() && Hi.hasOneUse();
928 };
929
930 SDNode *MulNode = nullptr;
931 SDValue AddLo, AddHi;
932
933 // Check if first operand pair is a supported multiply with single use.
934 if (IsSupportedMulWithOneUse(Op0Lo, Op0Hi)) {
935 MulNode = Op0Lo.getNode();
936 AddLo = Op1Lo;
937 AddHi = Op1Hi;
938 }
939 // ADDD is commutative. Check if second operand pair is a supported multiply
940 // with single use.
941 else if (IsSupportedMulWithOneUse(Op1Lo, Op1Hi)) {
942 MulNode = Op1Lo.getNode();
943 AddLo = Op0Lo;
944 AddHi = Op0Hi;
945 } else {
946 return false;
947 }
948
949 unsigned Opc;
950 switch (MulNode->getOpcode()) {
951 default:
952 llvm_unreachable("Unexpected multiply opcode");
953 case ISD::UMUL_LOHI:
954 Opc = RISCV::WMACCU;
955 break;
956 case ISD::SMUL_LOHI:
957 Opc = RISCV::WMACC;
958 break;
959 case RISCVISD::WMULSU:
960 Opc = RISCV::WMACCSU;
961 break;
962 }
963
964 SDValue Acc = buildGPRPair(CurDAG, DL, MVT::Untyped, AddLo, AddHi);
965
966 // WMACC instruction format: rd, rs1, rs2 (rd is accumulator).
967 SDValue M0 = MulNode->getOperand(0);
968 SDValue M1 = MulNode->getOperand(1);
969 MachineSDNode *New =
970 CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Acc, M0, M1);
971
972 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(New, 0));
975 CurDAG->RemoveDeadNode(Node);
976 return true;
977}
978
979static Register getTileReg(uint64_t TileNum) {
980 assert(TileNum <= 15 && "Invalid tile number");
981 return RISCV::T0 + TileNum;
982}
983
985 if (!Subtarget->hasVInstructions())
986 return;
987
988 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
989
990 SDLoc DL(Node);
991 unsigned IntNo = Node->getConstantOperandVal(1);
992
993 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
994 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
995 "Unexpected vsetvli intrinsic");
996
997 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
998 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
999 SDValue SEWOp =
1000 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
1001 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
1002 Node->getOperand(4), Node->getOperand(5),
1003 Node->getOperand(8), SEWOp,
1004 Node->getOperand(0)};
1005
1006 unsigned Opcode;
1007 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
1008 switch (LMulSDNode->getSExtValue()) {
1009 case 5:
1010 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF8
1011 : RISCV::PseudoSF_VC_I_SE_MF8;
1012 break;
1013 case 6:
1014 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF4
1015 : RISCV::PseudoSF_VC_I_SE_MF4;
1016 break;
1017 case 7:
1018 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF2
1019 : RISCV::PseudoSF_VC_I_SE_MF2;
1020 break;
1021 case 0:
1022 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M1
1023 : RISCV::PseudoSF_VC_I_SE_M1;
1024 break;
1025 case 1:
1026 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M2
1027 : RISCV::PseudoSF_VC_I_SE_M2;
1028 break;
1029 case 2:
1030 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M4
1031 : RISCV::PseudoSF_VC_I_SE_M4;
1032 break;
1033 case 3:
1034 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M8
1035 : RISCV::PseudoSF_VC_I_SE_M8;
1036 break;
1037 }
1038
1039 ReplaceNode(Node, CurDAG->getMachineNode(
1040 Opcode, DL, Node->getSimpleValueType(0), Operands));
1041}
1042
1043static unsigned getSegInstNF(unsigned Intrinsic) {
1044#define INST_NF_CASE(NAME, NF) \
1045 case Intrinsic::riscv_##NAME##NF: \
1046 return NF;
1047#define INST_NF_CASE_MASK(NAME, NF) \
1048 case Intrinsic::riscv_##NAME##NF##_mask: \
1049 return NF;
1050#define INST_NF_CASE_FF(NAME, NF) \
1051 case Intrinsic::riscv_##NAME##NF##ff: \
1052 return NF;
1053#define INST_NF_CASE_FF_MASK(NAME, NF) \
1054 case Intrinsic::riscv_##NAME##NF##ff_mask: \
1055 return NF;
1056#define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
1057 MACRO_NAME(NAME, 2) \
1058 MACRO_NAME(NAME, 3) \
1059 MACRO_NAME(NAME, 4) \
1060 MACRO_NAME(NAME, 5) \
1061 MACRO_NAME(NAME, 6) \
1062 MACRO_NAME(NAME, 7) \
1063 MACRO_NAME(NAME, 8)
1064#define INST_ALL_NF_CASE(NAME) \
1065 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
1066 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
1067#define INST_ALL_NF_CASE_WITH_FF(NAME) \
1068 INST_ALL_NF_CASE(NAME) \
1069 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
1070 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
1071 switch (Intrinsic) {
1072 default:
1073 llvm_unreachable("Unexpected segment load/store intrinsic");
1075 INST_ALL_NF_CASE(vlsseg)
1076 INST_ALL_NF_CASE(vloxseg)
1077 INST_ALL_NF_CASE(vluxseg)
1078 INST_ALL_NF_CASE(vsseg)
1079 INST_ALL_NF_CASE(vssseg)
1080 INST_ALL_NF_CASE(vsoxseg)
1081 INST_ALL_NF_CASE(vsuxseg)
1082 }
1083}
1084
1085static bool isApplicableToPLI(int Val) {
1086 // Check if the immediate is packed i8 or i10
1087 int16_t Bit31To16 = Val >> 16;
1088 int16_t Bit15To0 = Val;
1089 int8_t Bit15To8 = Bit15To0 >> 8;
1090 int8_t Bit7To0 = Val;
1091 if (Bit31To16 != Bit15To0)
1092 return false;
1093
1094 return isInt<10>(Bit31To16) || Bit15To8 == Bit7To0;
1095}
1096
1098 // If we have a custom node, we have already selected.
1099 if (Node->isMachineOpcode()) {
1100 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
1101 Node->setNodeId(-1);
1102 return;
1103 }
1104
1105 // Instruction Selection not handled by the auto-generated tablegen selection
1106 // should be handled here.
1107 unsigned Opcode = Node->getOpcode();
1108 MVT XLenVT = Subtarget->getXLenVT();
1109 SDLoc DL(Node);
1110 MVT VT = Node->getSimpleValueType(0);
1111
1112 bool HasBitTest = Subtarget->hasBEXTILike();
1113
1114 switch (Opcode) {
1115 case ISD::Constant: {
1116 assert(VT == Subtarget->getXLenVT() && "Unexpected VT");
1117 auto *ConstNode = cast<ConstantSDNode>(Node);
1118 if (ConstNode->isZero()) {
1119 SDValue New =
1120 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
1121 ReplaceNode(Node, New.getNode());
1122 return;
1123 }
1124 int64_t Imm = ConstNode->getSExtValue();
1125 // If only the lower 8 bits are used, try to convert this to a simm6 by
1126 // sign-extending bit 7. This is neutral without the C extension, and
1127 // allows C.LI to be used if C is present.
1128 if (!isInt<8>(Imm) && isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) &&
1130 Imm = SignExtend64<8>(Imm);
1131 // If the upper XLen-16 bits are not used, try to convert this to a simm12
1132 // by sign extending bit 15.
1133 else if (!isInt<16>(Imm) && isUInt<16>(Imm) &&
1135 Imm = SignExtend64<16>(Imm);
1136 // If the upper 32-bits are not used try to convert this into a simm32 by
1137 // sign extending bit 32.
1138 else if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
1139 Imm = SignExtend64<32>(Imm);
1140
1141 if (VT == MVT::i64 && Subtarget->hasStdExtP() && isApplicableToPLI(Imm) &&
1142 hasAllWUsers(Node)) {
1143 // If it's 4 packed 8-bit integers or 2 packed signed 16-bit integers, we
1144 // can simply copy lower 32 bits to higher 32 bits to make it able to
1145 // rematerialize to PLI_B or PLI_H
1146 Imm = ((uint64_t)Imm << 32) | (Imm & 0xFFFFFFFF);
1147 }
1148
1149 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
1150 return;
1151 }
1152 case ISD::ConstantFP: {
1153 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
1154
1155 bool Is64Bit = Subtarget->is64Bit();
1156 bool HasZdinx = Subtarget->hasStdExtZdinx();
1157
1158 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
1159 SDValue Imm;
1160 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
1161 // create an integer immediate.
1162 if (APF.isPosZero() || NegZeroF64) {
1163 if (VT == MVT::f64 && HasZdinx && !Is64Bit)
1164 Imm = CurDAG->getRegister(RISCV::X0_Pair, MVT::f64);
1165 else
1166 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
1167 } else {
1168 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
1169 *Subtarget);
1170 }
1171
1172 unsigned Opc;
1173 switch (VT.SimpleTy) {
1174 default:
1175 llvm_unreachable("Unexpected size");
1176 case MVT::bf16:
1177 assert(Subtarget->hasStdExtZfbfmin());
1178 Opc = RISCV::FMV_H_X;
1179 break;
1180 case MVT::f16:
1181 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
1182 break;
1183 case MVT::f32:
1184 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
1185 break;
1186 case MVT::f64:
1187 // For RV32, we can't move from a GPR, we need to convert instead. This
1188 // should only happen for +0.0 and -0.0.
1189 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
1190 if (HasZdinx)
1191 Opc = RISCV::COPY;
1192 else
1193 Opc = Is64Bit ? RISCV::FMV_D_X : RISCV::FCVT_D_W;
1194 break;
1195 }
1196
1197 SDNode *Res;
1198 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
1199 Res =
1200 CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
1201 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
1202 Res =
1203 CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
1204 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
1205 Res = CurDAG->getMachineNode(
1206 Opc, DL, VT, Imm,
1207 CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
1208 else
1209 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1210
1211 // For f64 -0.0, we need to insert a fneg.d idiom.
1212 if (NegZeroF64) {
1213 Opc = RISCV::FSGNJN_D;
1214 if (HasZdinx)
1215 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1216 Res =
1217 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1218 }
1219
1220 ReplaceNode(Node, Res);
1221 return;
1222 }
1223 case RISCVISD::BuildGPRPair:
1224 case RISCVISD::BuildPairF64: {
1225 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
1226 break;
1227
1228 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) &&
1229 "BuildPairF64 only handled here on rv32i_zdinx");
1230
1231 SDValue N =
1232 buildGPRPair(CurDAG, DL, VT, Node->getOperand(0), Node->getOperand(1));
1233 ReplaceNode(Node, N.getNode());
1234 return;
1235 }
1236 case RISCVISD::SplitGPRPair:
1237 case RISCVISD::SplitF64: {
1238 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
1239 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) &&
1240 "SplitF64 only handled here on rv32i_zdinx");
1241
1242 if (!SDValue(Node, 0).use_empty()) {
1243 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1244 Node->getValueType(0),
1245 Node->getOperand(0));
1246 ReplaceUses(SDValue(Node, 0), Lo);
1247 }
1248
1249 if (!SDValue(Node, 1).use_empty()) {
1250 SDValue Hi = CurDAG->getTargetExtractSubreg(
1251 RISCV::sub_gpr_odd, DL, Node->getValueType(1), Node->getOperand(0));
1252 ReplaceUses(SDValue(Node, 1), Hi);
1253 }
1254
1255 CurDAG->RemoveDeadNode(Node);
1256 return;
1257 }
1258
1259 assert(Opcode != RISCVISD::SplitGPRPair &&
1260 "SplitGPRPair should already be handled");
1261
1262 if (!Subtarget->hasStdExtZfa())
1263 break;
1264 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1265 "Unexpected subtarget");
1266
1267 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1268 if (!SDValue(Node, 0).use_empty()) {
1269 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1270 Node->getOperand(0));
1271 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1272 }
1273 if (!SDValue(Node, 1).use_empty()) {
1274 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1275 Node->getOperand(0));
1276 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1277 }
1278
1279 CurDAG->RemoveDeadNode(Node);
1280 return;
1281 }
1282 case ISD::SHL: {
1283 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1284 if (!N1C)
1285 break;
1286 SDValue N0 = Node->getOperand(0);
1287 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1289 break;
1290 unsigned ShAmt = N1C->getZExtValue();
1291 uint64_t Mask = N0.getConstantOperandVal(1);
1292
1293 if (isShiftedMask_64(Mask)) {
1294 unsigned XLen = Subtarget->getXLen();
1295 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1296 unsigned TrailingZeros = llvm::countr_zero(Mask);
1297 if (ShAmt <= 32 && TrailingZeros > 0 && LeadingZeros == 32) {
1298 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1299 // where C2 has 32 leading zeros and C3 trailing zeros.
1300 SDNode *SRLIW = CurDAG->getMachineNode(
1301 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1302 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1303 SDNode *SLLI = CurDAG->getMachineNode(
1304 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1305 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1306 ReplaceNode(Node, SLLI);
1307 return;
1308 }
1309 if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1310 XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1311 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1312 // where C2 has C4 leading zeros and no trailing zeros.
1313 // This is profitable if the "and" was to be lowered to
1314 // (srli (slli X, C4), C4) and not (andi X, C2).
1315 // For "LeadingZeros == 32":
1316 // - with Zba it's just (slli.uw X, C)
1317 // - without Zba a tablegen pattern applies the very same
1318 // transform as we would have done here
1319 SDNode *SLLI = CurDAG->getMachineNode(
1320 RISCV::SLLI, DL, VT, N0.getOperand(0),
1321 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1322 SDNode *SRLI = CurDAG->getMachineNode(
1323 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1324 CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
1325 ReplaceNode(Node, SRLI);
1326 return;
1327 }
1328 }
1329 break;
1330 }
1331 case ISD::SRL: {
1332 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1333 if (!N1C)
1334 break;
1335 SDValue N0 = Node->getOperand(0);
1336 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1337 break;
1338 unsigned ShAmt = N1C->getZExtValue();
1339 uint64_t Mask = N0.getConstantOperandVal(1);
1340
1341 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1342 // 32 leading zeros and C3 trailing zeros.
1343 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1344 unsigned XLen = Subtarget->getXLen();
1345 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1346 unsigned TrailingZeros = llvm::countr_zero(Mask);
1347 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1348 SDNode *SRLIW = CurDAG->getMachineNode(
1349 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1350 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1351 SDNode *SLLI = CurDAG->getMachineNode(
1352 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1353 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1354 ReplaceNode(Node, SLLI);
1355 return;
1356 }
1357 }
1358
1359 // Optimize (srl (and X, C2), C) ->
1360 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1361 // Where C2 is a mask with C3 trailing ones.
1362 // Taking into account that the C2 may have had lower bits unset by
1363 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1364 // This pattern occurs when type legalizing right shifts for types with
1365 // less than XLen bits.
1366 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1367 if (!isMask_64(Mask))
1368 break;
1369 unsigned TrailingOnes = llvm::countr_one(Mask);
1370 if (ShAmt >= TrailingOnes)
1371 break;
1372 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1373 if (TrailingOnes == 32) {
1374 SDNode *SRLI = CurDAG->getMachineNode(
1375 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1376 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1377 ReplaceNode(Node, SRLI);
1378 return;
1379 }
1380
1381 // Only do the remaining transforms if the AND has one use.
1382 if (!N0.hasOneUse())
1383 break;
1384
1385 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1386 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1387 SDNode *BEXTI = CurDAG->getMachineNode(
1388 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1389 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1390 ReplaceNode(Node, BEXTI);
1391 return;
1392 }
1393
1394 const unsigned Msb = TrailingOnes - 1;
1395 const unsigned Lsb = ShAmt;
1396 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0.getOperand(0), Msb, Lsb))
1397 return;
1398
1399 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1400 SDNode *SLLI =
1401 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1402 CurDAG->getTargetConstant(LShAmt, DL, VT));
1403 SDNode *SRLI = CurDAG->getMachineNode(
1404 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1405 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1406 ReplaceNode(Node, SRLI);
1407 return;
1408 }
1409 case ISD::SRA: {
1411 return;
1412
1414 return;
1415
1416 // Optimize (sra (sext_inreg X, i16), C) ->
1417 // (srai (slli X, (XLen-16), (XLen-16) + C)
1418 // And (sra (sext_inreg X, i8), C) ->
1419 // (srai (slli X, (XLen-8), (XLen-8) + C)
1420 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1421 // This transform matches the code we get without Zbb. The shifts are more
1422 // compressible, and this can help expose CSE opportunities in the sdiv by
1423 // constant optimization.
1424 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1425 if (!N1C)
1426 break;
1427 SDValue N0 = Node->getOperand(0);
1428 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1429 break;
1430 unsigned ShAmt = N1C->getZExtValue();
1431 unsigned ExtSize =
1432 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1433 // ExtSize of 32 should use sraiw via tablegen pattern.
1434 if (ExtSize >= 32 || ShAmt >= ExtSize)
1435 break;
1436 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1437 SDNode *SLLI =
1438 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1439 CurDAG->getTargetConstant(LShAmt, DL, VT));
1440 SDNode *SRAI = CurDAG->getMachineNode(
1441 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1442 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1443 ReplaceNode(Node, SRAI);
1444 return;
1445 }
1447 // Optimize (sext_inreg (srl X, C), i8/i16) ->
1448 // (srai (slli X, XLen-ExtSize-C), XLen-ExtSize)
1449 // This is a bitfield extract pattern where we're extracting a signed
1450 // 8-bit or 16-bit field from position C.
1451 SDValue N0 = Node->getOperand(0);
1452 if (N0.getOpcode() != ISD::SRL || !N0.hasOneUse())
1453 break;
1454
1455 auto *ShAmtC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1456 if (!ShAmtC)
1457 break;
1458
1459 unsigned ExtSize =
1460 cast<VTSDNode>(Node->getOperand(1))->getVT().getSizeInBits();
1461 unsigned ShAmt = ShAmtC->getZExtValue();
1462 unsigned XLen = Subtarget->getXLen();
1463
1464 // Only handle types less than 32, and make sure the shift amount is valid.
1465 if (ExtSize >= 32 || ShAmt >= XLen - ExtSize)
1466 break;
1467
1468 unsigned LShAmt = XLen - ExtSize - ShAmt;
1469 SDNode *SLLI =
1470 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1471 CurDAG->getTargetConstant(LShAmt, DL, VT));
1472 SDNode *SRAI = CurDAG->getMachineNode(
1473 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1474 CurDAG->getTargetConstant(XLen - ExtSize, DL, VT));
1475 ReplaceNode(Node, SRAI);
1476 return;
1477 }
1478 case ISD::OR: {
1480 return;
1481
1482 break;
1483 }
1484 case ISD::XOR:
1486 return;
1487
1488 break;
1489 case ISD::AND: {
1490 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1491 if (!N1C)
1492 break;
1493
1494 SDValue N0 = Node->getOperand(0);
1495
1496 bool LeftShift = N0.getOpcode() == ISD::SHL;
1497 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1498 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1499 if (!C)
1500 break;
1501 unsigned C2 = C->getZExtValue();
1502 unsigned XLen = Subtarget->getXLen();
1503 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1504
1505 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1506 // shift pair might offer more compression opportunities.
1507 // TODO: We could check for C extension here, but we don't have many lit
1508 // tests with the C extension enabled so not checking gets better
1509 // coverage.
1510 // TODO: What if ANDI faster than shift?
1511 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1512
1513 uint64_t C1 = N1C->getZExtValue();
1514
1515 // Clear irrelevant bits in the mask.
1516 if (LeftShift)
1518 else
1519 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1520
1521 // Some transforms should only be done if the shift has a single use or
1522 // the AND would become (srli (slli X, 32), 32)
1523 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1524
1525 SDValue X = N0.getOperand(0);
1526
1527 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1528 // with c3 leading zeros.
1529 if (!LeftShift && isMask_64(C1)) {
1530 unsigned Leading = XLen - llvm::bit_width(C1);
1531 if (C2 < Leading) {
1532 // If the number of leading zeros is C2+32 this can be SRLIW.
1533 if (C2 + 32 == Leading) {
1534 SDNode *SRLIW = CurDAG->getMachineNode(
1535 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1536 ReplaceNode(Node, SRLIW);
1537 return;
1538 }
1539
1540 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1541 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1542 //
1543 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1544 // legalized and goes through DAG combine.
1545 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1546 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1547 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1548 SDNode *SRAIW =
1549 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1550 CurDAG->getTargetConstant(31, DL, VT));
1551 SDNode *SRLIW = CurDAG->getMachineNode(
1552 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1553 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1554 ReplaceNode(Node, SRLIW);
1555 return;
1556 }
1557
1558 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1559 // available.
1560 // Transform (and (srl x, C2), C1)
1561 // -> (<bfextract> x, msb, lsb)
1562 //
1563 // Make sure to keep this below the SRLIW cases, as we always want to
1564 // prefer the more common instruction.
1565 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1566 const unsigned Lsb = C2;
1567 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1568 return;
1569
1570 // (srli (slli x, c3-c2), c3).
1571 // Skip if we could use (zext.w (sraiw X, C2)).
1572 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1573 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1574 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1575 // Also Skip if we can use bexti or th.tst.
1576 Skip |= HasBitTest && Leading == XLen - 1;
1577 if (OneUseOrZExtW && !Skip) {
1578 SDNode *SLLI = CurDAG->getMachineNode(
1579 RISCV::SLLI, DL, VT, X,
1580 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1581 SDNode *SRLI = CurDAG->getMachineNode(
1582 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1583 CurDAG->getTargetConstant(Leading, DL, VT));
1584 ReplaceNode(Node, SRLI);
1585 return;
1586 }
1587 }
1588 }
1589
1590 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1591 // shifted by c2 bits with c3 leading zeros.
1592 if (LeftShift && isShiftedMask_64(C1)) {
1593 unsigned Leading = XLen - llvm::bit_width(C1);
1594
1595 if (C2 + Leading < XLen &&
1596 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1597 // Use slli.uw when possible.
1598 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1599 SDNode *SLLI_UW =
1600 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1601 CurDAG->getTargetConstant(C2, DL, VT));
1602 ReplaceNode(Node, SLLI_UW);
1603 return;
1604 }
1605
1606 // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
1607 // available.
1608 // Transform (and (shl x, c2), c1)
1609 // -> (<bfinsert> x, msb, lsb)
1610 // e.g.
1611 // (and (shl x, 12), 0x00fff000)
1612 // If XLen = 32 and C2 = 12, then
1613 // Msb = 32 - 8 - 1 = 23 and Lsb = 12
1614 const unsigned Msb = XLen - Leading - 1;
1615 const unsigned Lsb = C2;
1616 if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
1617 return;
1618
1619 if (OneUseOrZExtW && !IsCANDI) {
1620 // (packh x0, X)
1621 if (Subtarget->hasStdExtZbkb() && C1 == 0xff00 && C2 == 8) {
1622 SDNode *PACKH = CurDAG->getMachineNode(
1623 RISCV::PACKH, DL, VT,
1624 CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()), X);
1625 ReplaceNode(Node, PACKH);
1626 return;
1627 }
1628 // (srli (slli c2+c3), c3)
1629 SDNode *SLLI = CurDAG->getMachineNode(
1630 RISCV::SLLI, DL, VT, X,
1631 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1632 SDNode *SRLI = CurDAG->getMachineNode(
1633 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1634 CurDAG->getTargetConstant(Leading, DL, VT));
1635 ReplaceNode(Node, SRLI);
1636 return;
1637 }
1638 }
1639 }
1640
1641 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1642 // shifted mask with c2 leading zeros and c3 trailing zeros.
1643 if (!LeftShift && isShiftedMask_64(C1)) {
1644 unsigned Leading = XLen - llvm::bit_width(C1);
1645 unsigned Trailing = llvm::countr_zero(C1);
1646 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1647 !IsCANDI) {
1648 unsigned SrliOpc = RISCV::SRLI;
1649 // If the input is zexti32 we should use SRLIW.
1650 if (X.getOpcode() == ISD::AND &&
1651 isa<ConstantSDNode>(X.getOperand(1)) &&
1652 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1653 SrliOpc = RISCV::SRLIW;
1654 X = X.getOperand(0);
1655 }
1656 SDNode *SRLI = CurDAG->getMachineNode(
1657 SrliOpc, DL, VT, X,
1658 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1659 SDNode *SLLI = CurDAG->getMachineNode(
1660 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1661 CurDAG->getTargetConstant(Trailing, DL, VT));
1662 ReplaceNode(Node, SLLI);
1663 return;
1664 }
1665 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1666 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1667 OneUseOrZExtW && !IsCANDI) {
1668 SDNode *SRLIW = CurDAG->getMachineNode(
1669 RISCV::SRLIW, DL, VT, X,
1670 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1671 SDNode *SLLI = CurDAG->getMachineNode(
1672 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1673 CurDAG->getTargetConstant(Trailing, DL, VT));
1674 ReplaceNode(Node, SLLI);
1675 return;
1676 }
1677 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1678 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1679 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1680 SDNode *SRLI = CurDAG->getMachineNode(
1681 RISCV::SRLI, DL, VT, X,
1682 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1683 SDNode *SLLI_UW = CurDAG->getMachineNode(
1684 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1685 CurDAG->getTargetConstant(Trailing, DL, VT));
1686 ReplaceNode(Node, SLLI_UW);
1687 return;
1688 }
1689 }
1690
1691 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1692 // shifted mask with no leading zeros and c3 trailing zeros.
1693 if (LeftShift && isShiftedMask_64(C1)) {
1694 unsigned Leading = XLen - llvm::bit_width(C1);
1695 unsigned Trailing = llvm::countr_zero(C1);
1696 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1697 SDNode *SRLI = CurDAG->getMachineNode(
1698 RISCV::SRLI, DL, VT, X,
1699 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1700 SDNode *SLLI = CurDAG->getMachineNode(
1701 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1702 CurDAG->getTargetConstant(Trailing, DL, VT));
1703 ReplaceNode(Node, SLLI);
1704 return;
1705 }
1706 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1707 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1708 SDNode *SRLIW = CurDAG->getMachineNode(
1709 RISCV::SRLIW, DL, VT, X,
1710 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1711 SDNode *SLLI = CurDAG->getMachineNode(
1712 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1713 CurDAG->getTargetConstant(Trailing, DL, VT));
1714 ReplaceNode(Node, SLLI);
1715 return;
1716 }
1717
1718 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1719 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1720 Subtarget->hasStdExtZba()) {
1721 SDNode *SRLI = CurDAG->getMachineNode(
1722 RISCV::SRLI, DL, VT, X,
1723 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1724 SDNode *SLLI_UW = CurDAG->getMachineNode(
1725 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1726 CurDAG->getTargetConstant(Trailing, DL, VT));
1727 ReplaceNode(Node, SLLI_UW);
1728 return;
1729 }
1730 }
1731 }
1732
1733 const uint64_t C1 = N1C->getZExtValue();
1734
1735 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1736 N0.hasOneUse()) {
1737 unsigned C2 = N0.getConstantOperandVal(1);
1738 unsigned XLen = Subtarget->getXLen();
1739 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1740
1741 SDValue X = N0.getOperand(0);
1742
1743 // Prefer SRAIW + ANDI when possible.
1744 bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1745 X.getOpcode() == ISD::SHL &&
1746 isa<ConstantSDNode>(X.getOperand(1)) &&
1747 X.getConstantOperandVal(1) == 32;
1748 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1749 // mask with c3 leading zeros and c2 is larger than c3.
1750 if (isMask_64(C1) && !Skip) {
1751 unsigned Leading = XLen - llvm::bit_width(C1);
1752 if (C2 > Leading) {
1753 SDNode *SRAI = CurDAG->getMachineNode(
1754 RISCV::SRAI, DL, VT, X,
1755 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1756 SDNode *SRLI = CurDAG->getMachineNode(
1757 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1758 CurDAG->getTargetConstant(Leading, DL, VT));
1759 ReplaceNode(Node, SRLI);
1760 return;
1761 }
1762 }
1763
1764 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1765 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1766 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1767 if (isShiftedMask_64(C1) && !Skip) {
1768 unsigned Leading = XLen - llvm::bit_width(C1);
1769 unsigned Trailing = llvm::countr_zero(C1);
1770 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1771 SDNode *SRAI = CurDAG->getMachineNode(
1772 RISCV::SRAI, DL, VT, N0.getOperand(0),
1773 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1774 SDNode *SRLI = CurDAG->getMachineNode(
1775 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1776 CurDAG->getTargetConstant(Leading + Trailing, DL, VT));
1777 SDNode *SLLI = CurDAG->getMachineNode(
1778 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1779 CurDAG->getTargetConstant(Trailing, DL, VT));
1780 ReplaceNode(Node, SLLI);
1781 return;
1782 }
1783 }
1784 }
1785
1786 // If C1 masks off the upper bits only (but can't be formed as an
1787 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1788 // available.
1789 // Transform (and x, C1)
1790 // -> (<bfextract> x, msb, lsb)
1791 if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue()) &&
1792 !(C1 == 0xffff && Subtarget->hasStdExtZbb()) &&
1793 !(C1 == 0xffffffff && Subtarget->hasStdExtZba())) {
1794 const unsigned Msb = llvm::bit_width(C1) - 1;
1795 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1796 return;
1797 }
1798
1800 return;
1801
1802 break;
1803 }
1804 case ISD::MUL: {
1805 // Special case for calculating (mul (and X, C2), C1) where the full product
1806 // fits in XLen bits. We can shift X left by the number of leading zeros in
1807 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1808 // product has XLen trailing zeros, putting it in the output of MULHU. This
1809 // can avoid materializing a constant in a register for C2.
1810
1811 // RHS should be a constant.
1812 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1813 if (!N1C || !N1C->hasOneUse())
1814 break;
1815
1816 // LHS should be an AND with constant.
1817 SDValue N0 = Node->getOperand(0);
1818 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1819 break;
1820
1822
1823 // Constant should be a mask.
1824 if (!isMask_64(C2))
1825 break;
1826
1827 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1828 // multiple users or the constant is a simm12. This prevents inserting a
1829 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1830 // make it more costly to materialize. Otherwise, using a SLLI might allow
1831 // it to be compressed.
1832 bool IsANDIOrZExt =
1833 isInt<12>(C2) ||
1834 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1835 // With XTHeadBb, we can use TH.EXTU.
1836 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1837 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1838 break;
1839 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1840 // the constant is a simm32.
1841 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1842 // With XTHeadBb, we can use TH.EXTU.
1843 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1844 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1845 break;
1846
1847 // We need to shift left the AND input and C1 by a total of XLen bits.
1848
1849 // How far left do we need to shift the AND input?
1850 unsigned XLen = Subtarget->getXLen();
1851 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1852
1853 // The constant gets shifted by the remaining amount unless that would
1854 // shift bits out.
1855 uint64_t C1 = N1C->getZExtValue();
1856 unsigned ConstantShift = XLen - LeadingZeros;
1857 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1858 break;
1859
1860 uint64_t ShiftedC1 = C1 << ConstantShift;
1861 // If this RV32, we need to sign extend the constant.
1862 if (XLen == 32)
1863 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1864
1865 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1866 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1867 SDNode *SLLI =
1868 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1869 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1870 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1871 SDValue(SLLI, 0), SDValue(Imm, 0));
1872 ReplaceNode(Node, MULHU);
1873 return;
1874 }
1875 case ISD::SMUL_LOHI:
1876 case ISD::UMUL_LOHI:
1877 case RISCVISD::WMULSU:
1878 case RISCVISD::WADDU:
1879 case RISCVISD::WSUBU: {
1880 assert(Subtarget->hasStdExtP() && !Subtarget->is64Bit() && VT == MVT::i32 &&
1881 "Unexpected opcode");
1882
1883 unsigned Opc;
1884 switch (Node->getOpcode()) {
1885 default:
1886 llvm_unreachable("Unexpected opcode");
1887 case ISD::SMUL_LOHI:
1888 Opc = RISCV::WMUL;
1889 break;
1890 case ISD::UMUL_LOHI:
1891 Opc = RISCV::WMULU;
1892 break;
1893 case RISCVISD::WMULSU:
1894 Opc = RISCV::WMULSU;
1895 break;
1896 case RISCVISD::WADDU:
1897 Opc = RISCV::WADDU;
1898 break;
1899 case RISCVISD::WSUBU:
1900 Opc = RISCV::WSUBU;
1901 break;
1902 }
1903
1904 SDNode *Result = CurDAG->getMachineNode(
1905 Opc, DL, MVT::Untyped, Node->getOperand(0), Node->getOperand(1));
1906
1907 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(Result, 0));
1908 ReplaceUses(SDValue(Node, 0), Lo);
1909 ReplaceUses(SDValue(Node, 1), Hi);
1910 CurDAG->RemoveDeadNode(Node);
1911 return;
1912 }
1913 case RISCVISD::WSLL:
1914 case RISCVISD::WSLA: {
1915 // Custom select WSLL/WSLA for RV32P.
1916 assert(Subtarget->hasStdExtP() && !Subtarget->is64Bit() && VT == MVT::i32 &&
1917 "Unexpected opcode");
1918
1919 bool IsSigned = Node->getOpcode() == RISCVISD::WSLA;
1920
1921 SDValue ShAmt = Node->getOperand(1);
1922
1923 unsigned Opc;
1924
1925 auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt);
1926 if (ShAmtC && ShAmtC->getZExtValue() < 64) {
1927 Opc = IsSigned ? RISCV::WSLAI : RISCV::WSLLI;
1928 ShAmt = CurDAG->getTargetConstant(ShAmtC->getZExtValue(), DL, XLenVT);
1929 } else {
1930 Opc = IsSigned ? RISCV::WSLA : RISCV::WSLL;
1931 }
1932
1933 SDNode *WShift = CurDAG->getMachineNode(Opc, DL, MVT::Untyped,
1934 Node->getOperand(0), ShAmt);
1935
1936 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(WShift, 0));
1937 ReplaceUses(SDValue(Node, 0), Lo);
1938 ReplaceUses(SDValue(Node, 1), Hi);
1939 CurDAG->RemoveDeadNode(Node);
1940 return;
1941 }
1942 case ISD::LOAD: {
1943 if (tryIndexedLoad(Node))
1944 return;
1945
1946 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1947 // We match post-incrementing load here
1949 if (Load->getAddressingMode() != ISD::POST_INC)
1950 break;
1951
1952 SDValue Chain = Node->getOperand(0);
1953 SDValue Base = Node->getOperand(1);
1954 SDValue Offset = Node->getOperand(2);
1955
1956 bool Simm12 = false;
1957 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1958
1959 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1960 int ConstantVal = ConstantOffset->getSExtValue();
1961 Simm12 = isInt<12>(ConstantVal);
1962 if (Simm12)
1963 Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1964 Offset.getValueType());
1965 }
1966
1967 unsigned Opcode = 0;
1968 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1969 case MVT::i8:
1970 if (Simm12 && SignExtend)
1971 Opcode = RISCV::CV_LB_ri_inc;
1972 else if (Simm12 && !SignExtend)
1973 Opcode = RISCV::CV_LBU_ri_inc;
1974 else if (!Simm12 && SignExtend)
1975 Opcode = RISCV::CV_LB_rr_inc;
1976 else
1977 Opcode = RISCV::CV_LBU_rr_inc;
1978 break;
1979 case MVT::i16:
1980 if (Simm12 && SignExtend)
1981 Opcode = RISCV::CV_LH_ri_inc;
1982 else if (Simm12 && !SignExtend)
1983 Opcode = RISCV::CV_LHU_ri_inc;
1984 else if (!Simm12 && SignExtend)
1985 Opcode = RISCV::CV_LH_rr_inc;
1986 else
1987 Opcode = RISCV::CV_LHU_rr_inc;
1988 break;
1989 case MVT::i32:
1990 if (Simm12)
1991 Opcode = RISCV::CV_LW_ri_inc;
1992 else
1993 Opcode = RISCV::CV_LW_rr_inc;
1994 break;
1995 default:
1996 break;
1997 }
1998 if (!Opcode)
1999 break;
2000
2001 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
2002 Chain.getSimpleValueType(), Base,
2003 Offset, Chain));
2004 return;
2005 }
2006 break;
2007 }
2008 case RISCVISD::LD_RV32: {
2009 assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
2010
2012 SDValue Chain = Node->getOperand(0);
2013 SDValue Addr = Node->getOperand(1);
2015
2016 SDValue Ops[] = {Base, Offset, Chain};
2017 MachineSDNode *New = CurDAG->getMachineNode(
2018 RISCV::LD_RV32, DL, {MVT::Untyped, MVT::Other}, Ops);
2019 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(New, 0));
2020 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
2021 ReplaceUses(SDValue(Node, 0), Lo);
2022 ReplaceUses(SDValue(Node, 1), Hi);
2023 ReplaceUses(SDValue(Node, 2), SDValue(New, 1));
2024 CurDAG->RemoveDeadNode(Node);
2025 return;
2026 }
2027 case RISCVISD::SD_RV32: {
2029 SDValue Chain = Node->getOperand(0);
2030 SDValue Addr = Node->getOperand(3);
2032
2033 SDValue Lo = Node->getOperand(1);
2034 SDValue Hi = Node->getOperand(2);
2035
2036 SDValue RegPair;
2037 // Peephole to use X0_Pair for storing zero.
2039 RegPair = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
2040 } else {
2041 RegPair = buildGPRPair(CurDAG, DL, MVT::Untyped, Lo, Hi);
2042 }
2043
2044 MachineSDNode *New = CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other,
2045 {RegPair, Base, Offset, Chain});
2046 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
2047 ReplaceUses(SDValue(Node, 0), SDValue(New, 0));
2048 CurDAG->RemoveDeadNode(Node);
2049 return;
2050 }
2051 case RISCVISD::ADDD:
2052 // Try to match WMACC pattern: ADDD where one operand pair comes from a
2053 // widening multiply.
2055 return;
2056
2057 // Fall through to regular ADDD selection.
2058 [[fallthrough]];
2059 case RISCVISD::SUBD:
2060 case RISCVISD::PPAIRE_DB:
2061 case RISCVISD::WADDAU:
2062 case RISCVISD::WSUBAU: {
2063 assert(!Subtarget->is64Bit() && "Unexpected opcode");
2064 assert(
2065 (Node->getOpcode() != RISCVISD::PPAIRE_DB || Subtarget->hasStdExtP()) &&
2066 "Unexpected opcode");
2067
2068 SDValue Op0Lo = Node->getOperand(0);
2069 SDValue Op0Hi = Node->getOperand(1);
2070
2071 SDValue Op0;
2072 if (isNullConstant(Op0Lo) && isNullConstant(Op0Hi)) {
2073 Op0 = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
2074 } else {
2075 Op0 = buildGPRPair(CurDAG, DL, MVT::Untyped, Op0Lo, Op0Hi);
2076 }
2077
2078 SDValue Op1Lo = Node->getOperand(2);
2079 SDValue Op1Hi = Node->getOperand(3);
2080
2081 MachineSDNode *New;
2082 if (Opcode == RISCVISD::WADDAU || Opcode == RISCVISD::WSUBAU) {
2083 // WADDAU/WSUBAU: Op0 is the accumulator (GPRPair), Op1Lo and Op1Hi are
2084 // the two 32-bit values.
2085 unsigned Opc = Opcode == RISCVISD::WADDAU ? RISCV::WADDAU : RISCV::WSUBAU;
2086 New = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Op0, Op1Lo, Op1Hi);
2087 } else {
2088 SDValue Op1 = buildGPRPair(CurDAG, DL, MVT::Untyped, Op1Lo, Op1Hi);
2089
2090 unsigned Opc;
2091 switch (Opcode) {
2092 default:
2093 llvm_unreachable("Unexpected opcode");
2094 case RISCVISD::ADDD:
2095 Opc = RISCV::ADDD;
2096 break;
2097 case RISCVISD::SUBD:
2098 Opc = RISCV::SUBD;
2099 break;
2100 case RISCVISD::PPAIRE_DB:
2101 Opc = RISCV::PPAIRE_DB;
2102 break;
2103 }
2104 New = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Op0, Op1);
2105 }
2106
2107 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(New, 0));
2108 ReplaceUses(SDValue(Node, 0), Lo);
2109 ReplaceUses(SDValue(Node, 1), Hi);
2110 CurDAG->RemoveDeadNode(Node);
2111 return;
2112 }
2114 unsigned IntNo = Node->getConstantOperandVal(0);
2115 switch (IntNo) {
2116 // By default we do not custom select any intrinsic.
2117 default:
2118 break;
2119 case Intrinsic::riscv_vmsgeu:
2120 case Intrinsic::riscv_vmsge: {
2121 SDValue Src1 = Node->getOperand(1);
2122 SDValue Src2 = Node->getOperand(2);
2123 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
2124 bool IsCmpConstant = false;
2125 bool IsCmpMinimum = false;
2126 // Only custom select scalar second operand.
2127 if (Src2.getValueType() != XLenVT)
2128 break;
2129 // Small constants are handled with patterns.
2130 int64_t CVal = 0;
2131 MVT Src1VT = Src1.getSimpleValueType();
2132 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
2133 IsCmpConstant = true;
2134 CVal = C->getSExtValue();
2135 if (CVal >= -15 && CVal <= 16) {
2136 if (!IsUnsigned || CVal != 0)
2137 break;
2138 IsCmpMinimum = true;
2139 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2140 Src1VT.getScalarSizeInBits())
2141 .getSExtValue()) {
2142 IsCmpMinimum = true;
2143 }
2144 }
2145 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
2146 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
2147 default:
2148 llvm_unreachable("Unexpected LMUL!");
2149#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2150 case RISCVVType::lmulenum: \
2151 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2152 : RISCV::PseudoVMSLT_VX_##suffix; \
2153 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
2154 : RISCV::PseudoVMSGT_VX_##suffix; \
2155 break;
2156 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2157 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2158 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2159 CASE_VMSLT_OPCODES(LMUL_1, M1)
2160 CASE_VMSLT_OPCODES(LMUL_2, M2)
2161 CASE_VMSLT_OPCODES(LMUL_4, M4)
2162 CASE_VMSLT_OPCODES(LMUL_8, M8)
2163#undef CASE_VMSLT_OPCODES
2164 }
2165 // Mask operations use the LMUL from the mask type.
2166 switch (RISCVTargetLowering::getLMUL(VT)) {
2167 default:
2168 llvm_unreachable("Unexpected LMUL!");
2169#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
2170 case RISCVVType::lmulenum: \
2171 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
2172 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
2173 break;
2174 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
2175 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
2176 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
2177 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
2178 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
2179 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
2180 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
2181#undef CASE_VMNAND_VMSET_OPCODES
2182 }
2183 SDValue SEW = CurDAG->getTargetConstant(
2184 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
2185 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
2186 SDValue VL;
2187 selectVLOp(Node->getOperand(3), VL);
2188
2189 // If vmsge(u) with minimum value, expand it to vmset.
2190 if (IsCmpMinimum) {
2192 CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, MaskSEW));
2193 return;
2194 }
2195
2196 if (IsCmpConstant) {
2197 SDValue Imm =
2198 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2199
2200 ReplaceNode(Node, CurDAG->getMachineNode(VMSGTOpcode, DL, VT,
2201 {Src1, Imm, VL, SEW}));
2202 return;
2203 }
2204
2205 // Expand to
2206 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
2207 SDValue Cmp = SDValue(
2208 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2209 0);
2210 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
2211 {Cmp, Cmp, VL, MaskSEW}));
2212 return;
2213 }
2214 case Intrinsic::riscv_vmsgeu_mask:
2215 case Intrinsic::riscv_vmsge_mask: {
2216 SDValue Src1 = Node->getOperand(2);
2217 SDValue Src2 = Node->getOperand(3);
2218 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
2219 bool IsCmpConstant = false;
2220 bool IsCmpMinimum = false;
2221 // Only custom select scalar second operand.
2222 if (Src2.getValueType() != XLenVT)
2223 break;
2224 // Small constants are handled with patterns.
2225 MVT Src1VT = Src1.getSimpleValueType();
2226 int64_t CVal = 0;
2227 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
2228 IsCmpConstant = true;
2229 CVal = C->getSExtValue();
2230 if (CVal >= -15 && CVal <= 16) {
2231 if (!IsUnsigned || CVal != 0)
2232 break;
2233 IsCmpMinimum = true;
2234 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2235 Src1VT.getScalarSizeInBits())
2236 .getSExtValue()) {
2237 IsCmpMinimum = true;
2238 }
2239 }
2240 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
2241 VMOROpcode, VMSGTMaskOpcode;
2242 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
2243 default:
2244 llvm_unreachable("Unexpected LMUL!");
2245#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2246 case RISCVVType::lmulenum: \
2247 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2248 : RISCV::PseudoVMSLT_VX_##suffix; \
2249 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
2250 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
2251 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
2252 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
2253 break;
2254 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2255 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2256 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2257 CASE_VMSLT_OPCODES(LMUL_1, M1)
2258 CASE_VMSLT_OPCODES(LMUL_2, M2)
2259 CASE_VMSLT_OPCODES(LMUL_4, M4)
2260 CASE_VMSLT_OPCODES(LMUL_8, M8)
2261#undef CASE_VMSLT_OPCODES
2262 }
2263 // Mask operations use the LMUL from the mask type.
2264 switch (RISCVTargetLowering::getLMUL(VT)) {
2265 default:
2266 llvm_unreachable("Unexpected LMUL!");
2267#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
2268 case RISCVVType::lmulenum: \
2269 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
2270 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
2271 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
2272 break;
2273 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
2274 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
2275 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
2280#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
2281 }
2282 SDValue SEW = CurDAG->getTargetConstant(
2283 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
2284 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
2285 SDValue VL;
2286 selectVLOp(Node->getOperand(5), VL);
2287 SDValue MaskedOff = Node->getOperand(1);
2288 SDValue Mask = Node->getOperand(4);
2289
2290 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
2291 if (IsCmpMinimum) {
2292 // We don't need vmor if the MaskedOff and the Mask are the same
2293 // value.
2294 if (Mask == MaskedOff) {
2295 ReplaceUses(Node, Mask.getNode());
2296 return;
2297 }
2299 CurDAG->getMachineNode(VMOROpcode, DL, VT,
2300 {Mask, MaskedOff, VL, MaskSEW}));
2301 return;
2302 }
2303
2304 // If the MaskedOff value and the Mask are the same value use
2305 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
2306 // This avoids needing to copy v0 to vd before starting the next sequence.
2307 if (Mask == MaskedOff) {
2308 SDValue Cmp = SDValue(
2309 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2310 0);
2311 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
2312 {Mask, Cmp, VL, MaskSEW}));
2313 return;
2314 }
2315
2316 SDValue PolicyOp =
2317 CurDAG->getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT);
2318
2319 if (IsCmpConstant) {
2320 SDValue Imm =
2321 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2322
2323 ReplaceNode(Node, CurDAG->getMachineNode(
2324 VMSGTMaskOpcode, DL, VT,
2325 {MaskedOff, Src1, Imm, Mask, VL, SEW, PolicyOp}));
2326 return;
2327 }
2328
2329 // Otherwise use
2330 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
2331 // The result is mask undisturbed.
2332 // We use the same instructions to emulate mask agnostic behavior, because
2333 // the agnostic result can be either undisturbed or all 1.
2334 SDValue Cmp = SDValue(CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
2335 {MaskedOff, Src1, Src2, Mask,
2336 VL, SEW, PolicyOp}),
2337 0);
2338 // vmxor.mm vd, vd, v0 is used to update active value.
2339 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
2340 {Cmp, Mask, VL, MaskSEW}));
2341 return;
2342 }
2343 case Intrinsic::riscv_vsetvli:
2344 case Intrinsic::riscv_vsetvlimax:
2345 return selectVSETVLI(Node);
2346 case Intrinsic::riscv_sf_vsettnt:
2347 case Intrinsic::riscv_sf_vsettm:
2348 case Intrinsic::riscv_sf_vsettk:
2349 return selectXSfmmVSET(Node);
2350 }
2351 break;
2352 }
2354 unsigned IntNo = Node->getConstantOperandVal(1);
2355 switch (IntNo) {
2356 // By default we do not custom select any intrinsic.
2357 default:
2358 break;
2359 case Intrinsic::riscv_vlseg2:
2360 case Intrinsic::riscv_vlseg3:
2361 case Intrinsic::riscv_vlseg4:
2362 case Intrinsic::riscv_vlseg5:
2363 case Intrinsic::riscv_vlseg6:
2364 case Intrinsic::riscv_vlseg7:
2365 case Intrinsic::riscv_vlseg8: {
2366 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2367 /*IsStrided*/ false);
2368 return;
2369 }
2370 case Intrinsic::riscv_vlseg2_mask:
2371 case Intrinsic::riscv_vlseg3_mask:
2372 case Intrinsic::riscv_vlseg4_mask:
2373 case Intrinsic::riscv_vlseg5_mask:
2374 case Intrinsic::riscv_vlseg6_mask:
2375 case Intrinsic::riscv_vlseg7_mask:
2376 case Intrinsic::riscv_vlseg8_mask: {
2377 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2378 /*IsStrided*/ false);
2379 return;
2380 }
2381 case Intrinsic::riscv_vlsseg2:
2382 case Intrinsic::riscv_vlsseg3:
2383 case Intrinsic::riscv_vlsseg4:
2384 case Intrinsic::riscv_vlsseg5:
2385 case Intrinsic::riscv_vlsseg6:
2386 case Intrinsic::riscv_vlsseg7:
2387 case Intrinsic::riscv_vlsseg8: {
2388 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2389 /*IsStrided*/ true);
2390 return;
2391 }
2392 case Intrinsic::riscv_vlsseg2_mask:
2393 case Intrinsic::riscv_vlsseg3_mask:
2394 case Intrinsic::riscv_vlsseg4_mask:
2395 case Intrinsic::riscv_vlsseg5_mask:
2396 case Intrinsic::riscv_vlsseg6_mask:
2397 case Intrinsic::riscv_vlsseg7_mask:
2398 case Intrinsic::riscv_vlsseg8_mask: {
2399 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2400 /*IsStrided*/ true);
2401 return;
2402 }
2403 case Intrinsic::riscv_vloxseg2:
2404 case Intrinsic::riscv_vloxseg3:
2405 case Intrinsic::riscv_vloxseg4:
2406 case Intrinsic::riscv_vloxseg5:
2407 case Intrinsic::riscv_vloxseg6:
2408 case Intrinsic::riscv_vloxseg7:
2409 case Intrinsic::riscv_vloxseg8:
2410 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2411 /*IsOrdered*/ true);
2412 return;
2413 case Intrinsic::riscv_vluxseg2:
2414 case Intrinsic::riscv_vluxseg3:
2415 case Intrinsic::riscv_vluxseg4:
2416 case Intrinsic::riscv_vluxseg5:
2417 case Intrinsic::riscv_vluxseg6:
2418 case Intrinsic::riscv_vluxseg7:
2419 case Intrinsic::riscv_vluxseg8:
2420 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2421 /*IsOrdered*/ false);
2422 return;
2423 case Intrinsic::riscv_vloxseg2_mask:
2424 case Intrinsic::riscv_vloxseg3_mask:
2425 case Intrinsic::riscv_vloxseg4_mask:
2426 case Intrinsic::riscv_vloxseg5_mask:
2427 case Intrinsic::riscv_vloxseg6_mask:
2428 case Intrinsic::riscv_vloxseg7_mask:
2429 case Intrinsic::riscv_vloxseg8_mask:
2430 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2431 /*IsOrdered*/ true);
2432 return;
2433 case Intrinsic::riscv_vluxseg2_mask:
2434 case Intrinsic::riscv_vluxseg3_mask:
2435 case Intrinsic::riscv_vluxseg4_mask:
2436 case Intrinsic::riscv_vluxseg5_mask:
2437 case Intrinsic::riscv_vluxseg6_mask:
2438 case Intrinsic::riscv_vluxseg7_mask:
2439 case Intrinsic::riscv_vluxseg8_mask:
2440 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2441 /*IsOrdered*/ false);
2442 return;
2443 case Intrinsic::riscv_vlseg8ff:
2444 case Intrinsic::riscv_vlseg7ff:
2445 case Intrinsic::riscv_vlseg6ff:
2446 case Intrinsic::riscv_vlseg5ff:
2447 case Intrinsic::riscv_vlseg4ff:
2448 case Intrinsic::riscv_vlseg3ff:
2449 case Intrinsic::riscv_vlseg2ff: {
2450 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ false);
2451 return;
2452 }
2453 case Intrinsic::riscv_vlseg8ff_mask:
2454 case Intrinsic::riscv_vlseg7ff_mask:
2455 case Intrinsic::riscv_vlseg6ff_mask:
2456 case Intrinsic::riscv_vlseg5ff_mask:
2457 case Intrinsic::riscv_vlseg4ff_mask:
2458 case Intrinsic::riscv_vlseg3ff_mask:
2459 case Intrinsic::riscv_vlseg2ff_mask: {
2460 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ true);
2461 return;
2462 }
2463 case Intrinsic::riscv_vloxei:
2464 case Intrinsic::riscv_vloxei_mask:
2465 case Intrinsic::riscv_vluxei:
2466 case Intrinsic::riscv_vluxei_mask: {
2467 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
2468 IntNo == Intrinsic::riscv_vluxei_mask;
2469 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
2470 IntNo == Intrinsic::riscv_vloxei_mask;
2471
2472 MVT VT = Node->getSimpleValueType(0);
2473 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2474
2475 unsigned CurOp = 2;
2476 SmallVector<SDValue, 8> Operands;
2477 Operands.push_back(Node->getOperand(CurOp++));
2478
2479 MVT IndexVT;
2480 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2481 /*IsStridedOrIndexed*/ true, Operands,
2482 /*IsLoad=*/true, &IndexVT);
2483
2485 "Element count mismatch");
2486
2489 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2490 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2491 reportFatalUsageError("The V extension does not support EEW=64 for "
2492 "index values when XLEN=32");
2493 }
2494 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2495 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
2496 static_cast<unsigned>(IndexLMUL));
2497 MachineSDNode *Load =
2498 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2499
2500 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2501
2502 ReplaceNode(Node, Load);
2503 return;
2504 }
2505 case Intrinsic::riscv_vlm:
2506 case Intrinsic::riscv_vle:
2507 case Intrinsic::riscv_vle_mask:
2508 case Intrinsic::riscv_vlse:
2509 case Intrinsic::riscv_vlse_mask: {
2510 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2511 IntNo == Intrinsic::riscv_vlse_mask;
2512 bool IsStrided =
2513 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2514
2515 MVT VT = Node->getSimpleValueType(0);
2516 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2517
2518 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2519 // operand at the IR level. In pseudos, they have both policy and
2520 // passthru operand. The passthru operand is needed to track the
2521 // "tail undefined" state, and the policy is there just for
2522 // for consistency - it will always be "don't care" for the
2523 // unmasked form.
2524 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2525 unsigned CurOp = 2;
2526 SmallVector<SDValue, 8> Operands;
2527 if (HasPassthruOperand)
2528 Operands.push_back(Node->getOperand(CurOp++));
2529 else {
2530 // We eagerly lower to implicit_def (instead of undef), as we
2531 // otherwise fail to select nodes such as: nxv1i1 = undef
2532 SDNode *Passthru =
2533 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
2534 Operands.push_back(SDValue(Passthru, 0));
2535 }
2536 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2537 Operands, /*IsLoad=*/true);
2538
2540 const RISCV::VLEPseudo *P =
2541 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2542 static_cast<unsigned>(LMUL));
2543 MachineSDNode *Load =
2544 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2545
2546 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2547
2548 ReplaceNode(Node, Load);
2549 return;
2550 }
2551 case Intrinsic::riscv_vleff:
2552 case Intrinsic::riscv_vleff_mask: {
2553 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2554
2555 MVT VT = Node->getSimpleValueType(0);
2556 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2557
2558 unsigned CurOp = 2;
2559 SmallVector<SDValue, 7> Operands;
2560 Operands.push_back(Node->getOperand(CurOp++));
2561 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2562 /*IsStridedOrIndexed*/ false, Operands,
2563 /*IsLoad=*/true);
2564
2566 const RISCV::VLEPseudo *P =
2567 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2568 Log2SEW, static_cast<unsigned>(LMUL));
2569 MachineSDNode *Load = CurDAG->getMachineNode(
2570 P->Pseudo, DL, Node->getVTList(), Operands);
2571 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2572
2573 ReplaceNode(Node, Load);
2574 return;
2575 }
2576 case Intrinsic::riscv_nds_vln:
2577 case Intrinsic::riscv_nds_vln_mask:
2578 case Intrinsic::riscv_nds_vlnu:
2579 case Intrinsic::riscv_nds_vlnu_mask: {
2580 bool IsMasked = IntNo == Intrinsic::riscv_nds_vln_mask ||
2581 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2582 bool IsUnsigned = IntNo == Intrinsic::riscv_nds_vlnu ||
2583 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2584
2585 MVT VT = Node->getSimpleValueType(0);
2586 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2587 unsigned CurOp = 2;
2588 SmallVector<SDValue, 8> Operands;
2589
2590 Operands.push_back(Node->getOperand(CurOp++));
2591 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2592 /*IsStridedOrIndexed=*/false, Operands,
2593 /*IsLoad=*/true);
2594
2596 const RISCV::NDSVLNPseudo *P = RISCV::getNDSVLNPseudo(
2597 IsMasked, IsUnsigned, Log2SEW, static_cast<unsigned>(LMUL));
2598 MachineSDNode *Load =
2599 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2600
2601 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2602 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2603
2604 ReplaceNode(Node, Load);
2605 return;
2606 }
2607 }
2608 break;
2609 }
2610 case ISD::INTRINSIC_VOID: {
2611 unsigned IntNo = Node->getConstantOperandVal(1);
2612 switch (IntNo) {
2613 case Intrinsic::riscv_vsseg2:
2614 case Intrinsic::riscv_vsseg3:
2615 case Intrinsic::riscv_vsseg4:
2616 case Intrinsic::riscv_vsseg5:
2617 case Intrinsic::riscv_vsseg6:
2618 case Intrinsic::riscv_vsseg7:
2619 case Intrinsic::riscv_vsseg8: {
2620 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2621 /*IsStrided*/ false);
2622 return;
2623 }
2624 case Intrinsic::riscv_vsseg2_mask:
2625 case Intrinsic::riscv_vsseg3_mask:
2626 case Intrinsic::riscv_vsseg4_mask:
2627 case Intrinsic::riscv_vsseg5_mask:
2628 case Intrinsic::riscv_vsseg6_mask:
2629 case Intrinsic::riscv_vsseg7_mask:
2630 case Intrinsic::riscv_vsseg8_mask: {
2631 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2632 /*IsStrided*/ false);
2633 return;
2634 }
2635 case Intrinsic::riscv_vssseg2:
2636 case Intrinsic::riscv_vssseg3:
2637 case Intrinsic::riscv_vssseg4:
2638 case Intrinsic::riscv_vssseg5:
2639 case Intrinsic::riscv_vssseg6:
2640 case Intrinsic::riscv_vssseg7:
2641 case Intrinsic::riscv_vssseg8: {
2642 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2643 /*IsStrided*/ true);
2644 return;
2645 }
2646 case Intrinsic::riscv_vssseg2_mask:
2647 case Intrinsic::riscv_vssseg3_mask:
2648 case Intrinsic::riscv_vssseg4_mask:
2649 case Intrinsic::riscv_vssseg5_mask:
2650 case Intrinsic::riscv_vssseg6_mask:
2651 case Intrinsic::riscv_vssseg7_mask:
2652 case Intrinsic::riscv_vssseg8_mask: {
2653 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2654 /*IsStrided*/ true);
2655 return;
2656 }
2657 case Intrinsic::riscv_vsoxseg2:
2658 case Intrinsic::riscv_vsoxseg3:
2659 case Intrinsic::riscv_vsoxseg4:
2660 case Intrinsic::riscv_vsoxseg5:
2661 case Intrinsic::riscv_vsoxseg6:
2662 case Intrinsic::riscv_vsoxseg7:
2663 case Intrinsic::riscv_vsoxseg8:
2664 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2665 /*IsOrdered*/ true);
2666 return;
2667 case Intrinsic::riscv_vsuxseg2:
2668 case Intrinsic::riscv_vsuxseg3:
2669 case Intrinsic::riscv_vsuxseg4:
2670 case Intrinsic::riscv_vsuxseg5:
2671 case Intrinsic::riscv_vsuxseg6:
2672 case Intrinsic::riscv_vsuxseg7:
2673 case Intrinsic::riscv_vsuxseg8:
2674 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2675 /*IsOrdered*/ false);
2676 return;
2677 case Intrinsic::riscv_vsoxseg2_mask:
2678 case Intrinsic::riscv_vsoxseg3_mask:
2679 case Intrinsic::riscv_vsoxseg4_mask:
2680 case Intrinsic::riscv_vsoxseg5_mask:
2681 case Intrinsic::riscv_vsoxseg6_mask:
2682 case Intrinsic::riscv_vsoxseg7_mask:
2683 case Intrinsic::riscv_vsoxseg8_mask:
2684 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2685 /*IsOrdered*/ true);
2686 return;
2687 case Intrinsic::riscv_vsuxseg2_mask:
2688 case Intrinsic::riscv_vsuxseg3_mask:
2689 case Intrinsic::riscv_vsuxseg4_mask:
2690 case Intrinsic::riscv_vsuxseg5_mask:
2691 case Intrinsic::riscv_vsuxseg6_mask:
2692 case Intrinsic::riscv_vsuxseg7_mask:
2693 case Intrinsic::riscv_vsuxseg8_mask:
2694 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2695 /*IsOrdered*/ false);
2696 return;
2697 case Intrinsic::riscv_vsoxei:
2698 case Intrinsic::riscv_vsoxei_mask:
2699 case Intrinsic::riscv_vsuxei:
2700 case Intrinsic::riscv_vsuxei_mask: {
2701 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2702 IntNo == Intrinsic::riscv_vsuxei_mask;
2703 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2704 IntNo == Intrinsic::riscv_vsoxei_mask;
2705
2706 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2707 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2708
2709 unsigned CurOp = 2;
2710 SmallVector<SDValue, 8> Operands;
2711 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2712
2713 MVT IndexVT;
2714 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2715 /*IsStridedOrIndexed*/ true, Operands,
2716 /*IsLoad=*/false, &IndexVT);
2717
2719 "Element count mismatch");
2720
2723 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2724 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2725 reportFatalUsageError("The V extension does not support EEW=64 for "
2726 "index values when XLEN=32");
2727 }
2728 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2729 IsMasked, IsOrdered, IndexLog2EEW,
2730 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2731 MachineSDNode *Store =
2732 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2733
2734 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2735
2736 ReplaceNode(Node, Store);
2737 return;
2738 }
2739 case Intrinsic::riscv_vsm:
2740 case Intrinsic::riscv_vse:
2741 case Intrinsic::riscv_vse_mask:
2742 case Intrinsic::riscv_vsse:
2743 case Intrinsic::riscv_vsse_mask: {
2744 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2745 IntNo == Intrinsic::riscv_vsse_mask;
2746 bool IsStrided =
2747 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2748
2749 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2750 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2751
2752 unsigned CurOp = 2;
2753 SmallVector<SDValue, 8> Operands;
2754 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2755
2756 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2757 Operands);
2758
2760 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2761 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2762 MachineSDNode *Store =
2763 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2764 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2765
2766 ReplaceNode(Node, Store);
2767 return;
2768 }
2769 case Intrinsic::riscv_sf_vc_x_se:
2770 case Intrinsic::riscv_sf_vc_i_se:
2772 return;
2773 case Intrinsic::riscv_sf_vlte8:
2774 case Intrinsic::riscv_sf_vlte16:
2775 case Intrinsic::riscv_sf_vlte32:
2776 case Intrinsic::riscv_sf_vlte64: {
2777 unsigned Log2SEW;
2778 unsigned PseudoInst;
2779 switch (IntNo) {
2780 case Intrinsic::riscv_sf_vlte8:
2781 PseudoInst = RISCV::PseudoSF_VLTE8;
2782 Log2SEW = 3;
2783 break;
2784 case Intrinsic::riscv_sf_vlte16:
2785 PseudoInst = RISCV::PseudoSF_VLTE16;
2786 Log2SEW = 4;
2787 break;
2788 case Intrinsic::riscv_sf_vlte32:
2789 PseudoInst = RISCV::PseudoSF_VLTE32;
2790 Log2SEW = 5;
2791 break;
2792 case Intrinsic::riscv_sf_vlte64:
2793 PseudoInst = RISCV::PseudoSF_VLTE64;
2794 Log2SEW = 6;
2795 break;
2796 }
2797
2798 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2799 SDValue TWidenOp = CurDAG->getTargetConstant(1, DL, XLenVT);
2800 SDValue Operands[] = {Node->getOperand(2),
2801 Node->getOperand(3),
2802 Node->getOperand(4),
2803 SEWOp,
2804 TWidenOp,
2805 Node->getOperand(0)};
2806
2807 MachineSDNode *TileLoad =
2808 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2809 CurDAG->setNodeMemRefs(TileLoad,
2810 {cast<MemSDNode>(Node)->getMemOperand()});
2811
2812 ReplaceNode(Node, TileLoad);
2813 return;
2814 }
2815 case Intrinsic::riscv_sf_mm_s_s:
2816 case Intrinsic::riscv_sf_mm_s_u:
2817 case Intrinsic::riscv_sf_mm_u_s:
2818 case Intrinsic::riscv_sf_mm_u_u:
2819 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2820 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2821 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2822 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2823 case Intrinsic::riscv_sf_mm_f_f: {
2824 bool HasFRM = false;
2825 unsigned PseudoInst;
2826 switch (IntNo) {
2827 case Intrinsic::riscv_sf_mm_s_s:
2828 PseudoInst = RISCV::PseudoSF_MM_S_S;
2829 break;
2830 case Intrinsic::riscv_sf_mm_s_u:
2831 PseudoInst = RISCV::PseudoSF_MM_S_U;
2832 break;
2833 case Intrinsic::riscv_sf_mm_u_s:
2834 PseudoInst = RISCV::PseudoSF_MM_U_S;
2835 break;
2836 case Intrinsic::riscv_sf_mm_u_u:
2837 PseudoInst = RISCV::PseudoSF_MM_U_U;
2838 break;
2839 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2840 PseudoInst = RISCV::PseudoSF_MM_E5M2_E5M2;
2841 HasFRM = true;
2842 break;
2843 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2844 PseudoInst = RISCV::PseudoSF_MM_E5M2_E4M3;
2845 HasFRM = true;
2846 break;
2847 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2848 PseudoInst = RISCV::PseudoSF_MM_E4M3_E5M2;
2849 HasFRM = true;
2850 break;
2851 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2852 PseudoInst = RISCV::PseudoSF_MM_E4M3_E4M3;
2853 HasFRM = true;
2854 break;
2855 case Intrinsic::riscv_sf_mm_f_f:
2856 if (Node->getOperand(3).getValueType().getScalarType() == MVT::bf16)
2857 PseudoInst = RISCV::PseudoSF_MM_F_F_ALT;
2858 else
2859 PseudoInst = RISCV::PseudoSF_MM_F_F;
2860 HasFRM = true;
2861 break;
2862 }
2863 uint64_t TileNum = Node->getConstantOperandVal(2);
2864 SDValue Op1 = Node->getOperand(3);
2865 SDValue Op2 = Node->getOperand(4);
2866 MVT VT = Op1->getSimpleValueType(0);
2867 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2868 SDValue TmOp = Node->getOperand(5);
2869 SDValue TnOp = Node->getOperand(6);
2870 SDValue TkOp = Node->getOperand(7);
2871 SDValue TWidenOp = Node->getOperand(8);
2872 SDValue Chain = Node->getOperand(0);
2873
2874 // sf.mm.f.f with sew=32, twiden=2 is invalid
2875 if (IntNo == Intrinsic::riscv_sf_mm_f_f && Log2SEW == 5 &&
2876 TWidenOp->getAsZExtVal() == 2)
2877 reportFatalUsageError("sf.mm.f.f doesn't support (sew=32, twiden=2)");
2878
2879 SmallVector<SDValue, 10> Operands(
2880 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Op1, Op2});
2881 if (HasFRM)
2882 Operands.push_back(
2883 CurDAG->getTargetConstant(RISCVFPRndMode::DYN, DL, XLenVT));
2884 Operands.append({TmOp, TnOp, TkOp,
2885 CurDAG->getTargetConstant(Log2SEW, DL, XLenVT), TWidenOp,
2886 Chain});
2887
2888 auto *NewNode =
2889 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2890
2891 ReplaceNode(Node, NewNode);
2892 return;
2893 }
2894 case Intrinsic::riscv_sf_vtzero_t: {
2895 uint64_t TileNum = Node->getConstantOperandVal(2);
2896 SDValue Tm = Node->getOperand(3);
2897 SDValue Tn = Node->getOperand(4);
2898 SDValue Log2SEW = Node->getOperand(5);
2899 SDValue TWiden = Node->getOperand(6);
2900 SDValue Chain = Node->getOperand(0);
2901 auto *NewNode = CurDAG->getMachineNode(
2902 RISCV::PseudoSF_VTZERO_T, DL, Node->getVTList(),
2903 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Tm, Tn, Log2SEW,
2904 TWiden, Chain});
2905
2906 ReplaceNode(Node, NewNode);
2907 return;
2908 }
2909 }
2910 break;
2911 }
2912 case ISD::BITCAST: {
2913 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2914 // Just drop bitcasts between vectors if both are fixed or both are
2915 // scalable.
2916 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2917 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2918 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2919 CurDAG->RemoveDeadNode(Node);
2920 return;
2921 }
2922 if (Subtarget->hasStdExtP()) {
2923 bool Is32BitCast =
2924 (VT == MVT::i32 && (SrcVT == MVT::v4i8 || SrcVT == MVT::v2i16)) ||
2925 (SrcVT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
2926 bool Is64BitCast =
2927 (VT == MVT::i64 && (SrcVT == MVT::v8i8 || SrcVT == MVT::v4i16 ||
2928 SrcVT == MVT::v2i32)) ||
2929 (SrcVT == MVT::i64 &&
2930 (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32));
2931 if (Is32BitCast || Is64BitCast) {
2932 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2933 CurDAG->RemoveDeadNode(Node);
2934 return;
2935 }
2936 }
2937 break;
2938 }
2939 case ISD::SPLAT_VECTOR: {
2940 if (!Subtarget->hasStdExtP())
2941 break;
2942 auto *ConstNode = dyn_cast<ConstantSDNode>(Node->getOperand(0));
2943 if (!ConstNode)
2944 break;
2945
2946 if (ConstNode->isZero()) {
2947 SDValue New =
2948 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
2949 ReplaceNode(Node, New.getNode());
2950 return;
2951 }
2952
2953 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
2954 APInt Val = ConstNode->getAPIntValue().trunc(EltSize);
2955
2956 // Find the smallest splat.
2957 if (Val.getBitWidth() > 16 && Val.isSplat(16))
2958 Val = Val.trunc(16);
2959 if (Val.getBitWidth() > 8 && Val.isSplat(8))
2960 Val = Val.trunc(8);
2961
2962 EltSize = Val.getBitWidth();
2963 int64_t Imm = Val.getSExtValue();
2964
2965 unsigned Opc = 0;
2966 if (EltSize == 8) {
2967 Opc = RISCV::PLI_B;
2968 } else if (isInt<10>(Imm)) {
2969 Opc = EltSize == 32 ? RISCV::PLI_W : RISCV::PLI_H;
2970 } else if (EltSize == 16 && isShiftedInt<10, 6>(Imm)) {
2971 Opc = RISCV::PLUI_H;
2972 Imm = Imm >> 6;
2973 } else if (EltSize == 32 && isShiftedInt<10, 22>(Imm)) {
2974 Opc = RISCV::PLUI_W;
2975 Imm = Imm >> 22;
2976 }
2977
2978 if (Opc) {
2979 SDNode *NewNode = CurDAG->getMachineNode(
2980 Opc, DL, VT, CurDAG->getSignedTargetConstant(Imm, DL, XLenVT));
2981 ReplaceNode(Node, NewNode);
2982 return;
2983 }
2984
2985 break;
2986 }
2988 if (Subtarget->hasStdExtP()) {
2989 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2990 if ((VT == MVT::v2i32 && SrcVT == MVT::i64) ||
2991 (VT == MVT::v4i8 && SrcVT == MVT::i32)) {
2992 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2993 CurDAG->RemoveDeadNode(Node);
2994 return;
2995 }
2996 }
2997 break;
2999 case RISCVISD::TUPLE_INSERT: {
3000 SDValue V = Node->getOperand(0);
3001 SDValue SubV = Node->getOperand(1);
3002 SDLoc DL(SubV);
3003 auto Idx = Node->getConstantOperandVal(2);
3004 MVT SubVecVT = SubV.getSimpleValueType();
3005
3006 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
3007 MVT SubVecContainerVT = SubVecVT;
3008 // Establish the correct scalable-vector types for any fixed-length type.
3009 if (SubVecVT.isFixedLengthVector()) {
3010 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
3012 [[maybe_unused]] bool ExactlyVecRegSized =
3013 Subtarget->expandVScale(SubVecVT.getSizeInBits())
3014 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
3015 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
3016 .getKnownMinValue()));
3017 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
3018 }
3019 MVT ContainerVT = VT;
3020 if (VT.isFixedLengthVector())
3021 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
3022
3023 const auto *TRI = Subtarget->getRegisterInfo();
3024 unsigned SubRegIdx;
3025 std::tie(SubRegIdx, Idx) =
3027 ContainerVT, SubVecContainerVT, Idx, TRI);
3028
3029 // If the Idx hasn't been completely eliminated then this is a subvector
3030 // insert which doesn't naturally align to a vector register. These must
3031 // be handled using instructions to manipulate the vector registers.
3032 if (Idx != 0)
3033 break;
3034
3035 RISCVVType::VLMUL SubVecLMUL =
3036 RISCVTargetLowering::getLMUL(SubVecContainerVT);
3037 [[maybe_unused]] bool IsSubVecPartReg =
3038 SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
3039 SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
3040 SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
3041 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
3042 V.isUndef()) &&
3043 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
3044 "the subvector is smaller than a full-sized register");
3045
3046 // If we haven't set a SubRegIdx, then we must be going between
3047 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
3048 if (SubRegIdx == RISCV::NoSubRegister) {
3049 unsigned InRegClassID =
3052 InRegClassID &&
3053 "Unexpected subvector extraction");
3054 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
3055 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
3056 DL, VT, SubV, RC);
3057 ReplaceNode(Node, NewNode);
3058 return;
3059 }
3060
3061 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
3062 ReplaceNode(Node, Insert.getNode());
3063 return;
3064 }
3066 case RISCVISD::TUPLE_EXTRACT: {
3067 SDValue V = Node->getOperand(0);
3068 auto Idx = Node->getConstantOperandVal(1);
3069 MVT InVT = V.getSimpleValueType();
3070 SDLoc DL(V);
3071
3072 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
3073 MVT SubVecContainerVT = VT;
3074 // Establish the correct scalable-vector types for any fixed-length type.
3075 if (VT.isFixedLengthVector()) {
3076 assert(Idx == 0);
3077 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
3078 }
3079 if (InVT.isFixedLengthVector())
3080 InVT = TLI.getContainerForFixedLengthVector(InVT);
3081
3082 const auto *TRI = Subtarget->getRegisterInfo();
3083 unsigned SubRegIdx;
3084 std::tie(SubRegIdx, Idx) =
3086 InVT, SubVecContainerVT, Idx, TRI);
3087
3088 // If the Idx hasn't been completely eliminated then this is a subvector
3089 // extract which doesn't naturally align to a vector register. These must
3090 // be handled using instructions to manipulate the vector registers.
3091 if (Idx != 0)
3092 break;
3093
3094 // If we haven't set a SubRegIdx, then we must be going between
3095 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
3096 if (SubRegIdx == RISCV::NoSubRegister) {
3097 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
3099 InRegClassID &&
3100 "Unexpected subvector extraction");
3101 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
3102 SDNode *NewNode =
3103 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
3104 ReplaceNode(Node, NewNode);
3105 return;
3106 }
3107
3108 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
3109 ReplaceNode(Node, Extract.getNode());
3110 return;
3111 }
3112 case RISCVISD::VMV_S_X_VL:
3113 case RISCVISD::VFMV_S_F_VL:
3114 case RISCVISD::VMV_V_X_VL:
3115 case RISCVISD::VFMV_V_F_VL: {
3116 // Try to match splat of a scalar load to a strided load with stride of x0.
3117 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
3118 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
3119 if (!Node->getOperand(0).isUndef())
3120 break;
3121 SDValue Src = Node->getOperand(1);
3122 auto *Ld = dyn_cast<LoadSDNode>(Src);
3123 // Can't fold load update node because the second
3124 // output is used so that load update node can't be removed.
3125 if (!Ld || Ld->isIndexed())
3126 break;
3127 EVT MemVT = Ld->getMemoryVT();
3128 // The memory VT should be the same size as the element type.
3129 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
3130 break;
3131 if (!IsProfitableToFold(Src, Node, Node) ||
3132 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
3133 break;
3134
3135 SDValue VL;
3136 if (IsScalarMove) {
3137 // We could deal with more VL if we update the VSETVLI insert pass to
3138 // avoid introducing more VSETVLI.
3139 if (!isOneConstant(Node->getOperand(2)))
3140 break;
3141 selectVLOp(Node->getOperand(2), VL);
3142 } else
3143 selectVLOp(Node->getOperand(2), VL);
3144
3145 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
3146 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
3147
3148 // If VL=1, then we don't need to do a strided load and can just do a
3149 // regular load.
3150 bool IsStrided = !isOneConstant(VL);
3151
3152 // Only do a strided load if we have optimized zero-stride vector load.
3153 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
3154 break;
3155
3156 SmallVector<SDValue> Operands = {
3157 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
3158 Ld->getBasePtr()};
3159 if (IsStrided)
3160 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
3162 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
3163 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
3164
3166 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
3167 /*IsMasked*/ false, IsStrided, /*FF*/ false,
3168 Log2SEW, static_cast<unsigned>(LMUL));
3169 MachineSDNode *Load =
3170 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
3171 // Update the chain.
3172 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
3173 // Record the mem-refs
3174 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
3175 // Replace the splat with the vlse.
3176 ReplaceNode(Node, Load);
3177 return;
3178 }
3179 case ISD::PREFETCH:
3180 // MIPS's prefetch instruction already encodes the hint within the
3181 // instruction itself, so no extra NTL hint is needed.
3182 if (Subtarget->hasVendorXMIPSCBOP())
3183 break;
3184
3185 unsigned Locality = Node->getConstantOperandVal(3);
3186 if (Locality > 2)
3187 break;
3188
3189 auto *LoadStoreMem = cast<MemSDNode>(Node);
3190 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
3192
3193 int NontemporalLevel = 0;
3194 switch (Locality) {
3195 case 0:
3196 NontemporalLevel = 3; // NTL.ALL
3197 break;
3198 case 1:
3199 NontemporalLevel = 1; // NTL.PALL
3200 break;
3201 case 2:
3202 NontemporalLevel = 0; // NTL.P1
3203 break;
3204 default:
3205 llvm_unreachable("unexpected locality value.");
3206 }
3207
3208 if (NontemporalLevel & 0b1)
3210 if (NontemporalLevel & 0b10)
3212 break;
3213 }
3214
3215 // Select the default instruction.
3216 SelectCode(Node);
3217}
3218
3220 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
3221 std::vector<SDValue> &OutOps) {
3222 // Always produce a register and immediate operand, as expected by
3223 // RISCVAsmPrinter::PrintAsmMemoryOperand.
3224 switch (ConstraintID) {
3227 SDValue Op0, Op1;
3228 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
3229 assert(Found && "SelectAddrRegImm should always succeed");
3230 OutOps.push_back(Op0);
3231 OutOps.push_back(Op1);
3232 return false;
3233 }
3235 OutOps.push_back(Op);
3236 OutOps.push_back(
3237 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
3238 return false;
3239 default:
3240 report_fatal_error("Unexpected asm memory constraint " +
3241 InlineAsm::getMemConstraintName(ConstraintID));
3242 }
3243
3244 return true;
3245}
3246
3248 SDValue &Offset) {
3249 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3250 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
3251 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
3252 return true;
3253 }
3254
3255 return false;
3256}
3257
3258// Fold constant addresses.
3259static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
3260 const MVT VT, const RISCVSubtarget *Subtarget,
3262 bool IsPrefetch = false) {
3263 if (!isa<ConstantSDNode>(Addr))
3264 return false;
3265
3266 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
3267
3268 // If the constant is a simm12, we can fold the whole constant and use X0 as
3269 // the base. If the constant can be materialized with LUI+simm12, use LUI as
3270 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
3271 int64_t Lo12 = SignExtend64<12>(CVal);
3272 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
3273 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
3274 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3275 return false;
3276 if (Hi) {
3277 int64_t Hi20 = (Hi >> 12) & 0xfffff;
3278 Base = SDValue(
3279 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
3280 CurDAG->getTargetConstant(Hi20, DL, VT)),
3281 0);
3282 } else {
3283 Base = CurDAG->getRegister(RISCV::X0, VT);
3284 }
3285 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
3286 return true;
3287 }
3288
3289 // Ask how constant materialization would handle this constant.
3290 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
3291
3292 // If the last instruction would be an ADDI, we can fold its immediate and
3293 // emit the rest of the sequence as the base.
3294 if (Seq.back().getOpcode() != RISCV::ADDI)
3295 return false;
3296 Lo12 = Seq.back().getImm();
3297 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3298 return false;
3299
3300 // Drop the last instruction.
3301 Seq.pop_back();
3302 assert(!Seq.empty() && "Expected more instructions in sequence");
3303
3304 Base = selectImmSeq(CurDAG, DL, VT, Seq);
3305 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
3306 return true;
3307}
3308
3309// Is this ADD instruction only used as the base pointer of scalar loads and
3310// stores?
3312 for (auto *User : Add->users()) {
3313 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
3314 User->getOpcode() != RISCVISD::LD_RV32 &&
3315 User->getOpcode() != RISCVISD::SD_RV32 &&
3316 User->getOpcode() != ISD::ATOMIC_LOAD &&
3317 User->getOpcode() != ISD::ATOMIC_STORE)
3318 return false;
3319 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3320 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
3321 VT != MVT::f64)
3322 return false;
3323 // Don't allow stores of the value. It must be used as the address.
3324 if (User->getOpcode() == ISD::STORE &&
3325 cast<StoreSDNode>(User)->getValue() == Add)
3326 return false;
3327 if (User->getOpcode() == ISD::ATOMIC_STORE &&
3328 cast<AtomicSDNode>(User)->getVal() == Add)
3329 return false;
3330 if (User->getOpcode() == RISCVISD::SD_RV32 &&
3331 (User->getOperand(0) == Add || User->getOperand(1) == Add))
3332 return false;
3333 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
3334 return false;
3335 }
3336
3337 return true;
3338}
3339
3341 switch (User->getOpcode()) {
3342 default:
3343 return false;
3344 case ISD::LOAD:
3345 case RISCVISD::LD_RV32:
3346 case ISD::ATOMIC_LOAD:
3347 break;
3348 case ISD::STORE:
3349 // Don't allow stores of Add. It must only be used as the address.
3350 if (cast<StoreSDNode>(User)->getValue() == Add)
3351 return false;
3352 break;
3353 case RISCVISD::SD_RV32:
3354 // Don't allow stores of Add. It must only be used as the address.
3355 if (User->getOperand(0) == Add || User->getOperand(1) == Add)
3356 return false;
3357 break;
3358 case ISD::ATOMIC_STORE:
3359 // Don't allow stores of Add. It must only be used as the address.
3360 if (cast<AtomicSDNode>(User)->getVal() == Add)
3361 return false;
3362 break;
3363 }
3364
3365 return true;
3366}
3367
3368// To prevent SelectAddrRegImm from folding offsets that conflict with the
3369// fusion of PseudoMovAddr, check if the offset of every use of a given address
3370// is within the alignment.
3372 Align Alignment) {
3373 assert(Addr->getOpcode() == RISCVISD::ADD_LO);
3374 for (auto *User : Addr->users()) {
3375 // If the user is a load or store, then the offset is 0 which is always
3376 // within alignment.
3377 if (isRegImmLoadOrStore(User, Addr))
3378 continue;
3379
3380 if (CurDAG->isBaseWithConstantOffset(SDValue(User, 0))) {
3381 int64_t CVal = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3382 if (!isInt<12>(CVal) || Alignment <= CVal)
3383 return false;
3384
3385 // Make sure all uses are foldable load/stores.
3386 for (auto *AddUser : User->users())
3387 if (!isRegImmLoadOrStore(AddUser, SDValue(User, 0)))
3388 return false;
3389
3390 continue;
3391 }
3392
3393 return false;
3394 }
3395
3396 return true;
3397}
3398
3400 SDValue &Offset) {
3401 if (SelectAddrFrameIndex(Addr, Base, Offset))
3402 return true;
3403
3404 SDLoc DL(Addr);
3405 MVT VT = Addr.getSimpleValueType();
3406
3407 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
3408 bool CanFold = true;
3409 // Unconditionally fold if operand 1 is not a global address (e.g.
3410 // externsymbol)
3411 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
3412 const DataLayout &DL = CurDAG->getDataLayout();
3413 Align Alignment = commonAlignment(
3414 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3415 if (!areOffsetsWithinAlignment(Addr, Alignment))
3416 CanFold = false;
3417 }
3418 if (CanFold) {
3419 Base = Addr.getOperand(0);
3420 Offset = Addr.getOperand(1);
3421 return true;
3422 }
3423 }
3424
3425 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3426 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3427 if (isInt<12>(CVal)) {
3428 Base = Addr.getOperand(0);
3429 if (Base.getOpcode() == RISCVISD::ADD_LO) {
3430 SDValue LoOperand = Base.getOperand(1);
3431 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
3432 // If the Lo in (ADD_LO hi, lo) is a global variable's address
3433 // (its low part, really), then we can rely on the alignment of that
3434 // variable to provide a margin of safety before low part can overflow
3435 // the 12 bits of the load/store offset. Check if CVal falls within
3436 // that margin; if so (low part + CVal) can't overflow.
3437 const DataLayout &DL = CurDAG->getDataLayout();
3438 Align Alignment = commonAlignment(
3439 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3440 if ((CVal == 0 || Alignment > CVal) &&
3441 areOffsetsWithinAlignment(Base, Alignment)) {
3442 int64_t CombinedOffset = CVal + GA->getOffset();
3443 Base = Base.getOperand(0);
3444 Offset = CurDAG->getTargetGlobalAddress(
3445 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
3446 CombinedOffset, GA->getTargetFlags());
3447 return true;
3448 }
3449 }
3450 }
3451
3452 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3453 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3454 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3455 return true;
3456 }
3457 }
3458
3459 // Handle ADD with large immediates.
3460 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3461 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3462 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3463
3464 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
3465 // an ADDI for part of the offset and fold the rest into the load/store.
3466 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
3467 if (CVal >= -4096 && CVal <= 4094) {
3468 int64_t Adj = CVal < 0 ? -2048 : 2047;
3469 Base = SDValue(
3470 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
3471 CurDAG->getSignedTargetConstant(Adj, DL, VT)),
3472 0);
3473 Offset = CurDAG->getSignedTargetConstant(CVal - Adj, DL, VT);
3474 return true;
3475 }
3476
3477 // For larger immediates, we might be able to save one instruction from
3478 // constant materialization by folding the Lo12 bits of the immediate into
3479 // the address. We should only do this if the ADD is only used by loads and
3480 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
3481 // separately with the full materialized immediate creating extra
3482 // instructions.
3483 if (isWorthFoldingAdd(Addr) &&
3484 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3485 Offset, /*IsPrefetch=*/false)) {
3486 // Insert an ADD instruction with the materialized Hi52 bits.
3487 Base = SDValue(
3488 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3489 0);
3490 return true;
3491 }
3492 }
3493
3494 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3495 /*IsPrefetch=*/false))
3496 return true;
3497
3498 Base = Addr;
3499 Offset = CurDAG->getTargetConstant(0, DL, VT);
3500 return true;
3501}
3502
3503/// Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
3505 SDValue &Offset) {
3506 if (SelectAddrFrameIndex(Addr, Base, Offset))
3507 return true;
3508
3509 SDLoc DL(Addr);
3510 MVT VT = Addr.getSimpleValueType();
3511
3512 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3513 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3514 if (isUInt<9>(CVal)) {
3515 Base = Addr.getOperand(0);
3516
3517 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3518 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3519 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3520 return true;
3521 }
3522 }
3523
3524 Base = Addr;
3525 Offset = CurDAG->getTargetConstant(0, DL, VT);
3526 return true;
3527}
3528
3529/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
3530/// Offset should be all zeros.
3532 SDValue &Offset) {
3533 if (SelectAddrFrameIndex(Addr, Base, Offset))
3534 return true;
3535
3536 SDLoc DL(Addr);
3537 MVT VT = Addr.getSimpleValueType();
3538
3539 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3540 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3541 if (isInt<12>(CVal)) {
3542 Base = Addr.getOperand(0);
3543
3544 // Early-out if not a valid offset.
3545 if ((CVal & 0b11111) != 0) {
3546 Base = Addr;
3547 Offset = CurDAG->getTargetConstant(0, DL, VT);
3548 return true;
3549 }
3550
3551 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3552 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3553 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3554 return true;
3555 }
3556 }
3557
3558 // Handle ADD with large immediates.
3559 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3560 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3561 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3562
3563 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
3564 // one instruction by folding adjustment (-2048 or 2016) into the address.
3565 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
3566 int64_t Adj = CVal < 0 ? -2048 : 2016;
3567 int64_t AdjustedOffset = CVal - Adj;
3568 Base =
3569 SDValue(CurDAG->getMachineNode(
3570 RISCV::ADDI, DL, VT, Addr.getOperand(0),
3571 CurDAG->getSignedTargetConstant(AdjustedOffset, DL, VT)),
3572 0);
3573 Offset = CurDAG->getSignedTargetConstant(Adj, DL, VT);
3574 return true;
3575 }
3576
3577 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3578 Offset, /*IsPrefetch=*/true)) {
3579 // Insert an ADD instruction with the materialized Hi52 bits.
3580 Base = SDValue(
3581 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3582 0);
3583 return true;
3584 }
3585 }
3586
3587 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3588 /*IsPrefetch=*/true))
3589 return true;
3590
3591 Base = Addr;
3592 Offset = CurDAG->getTargetConstant(0, DL, VT);
3593 return true;
3594}
3595
3596/// Return true if this a load/store that we have a RegRegScale instruction for.
3598 const RISCVSubtarget &Subtarget) {
3599 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE)
3600 return false;
3601 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3602 if (!(VT.isScalarInteger() &&
3603 (Subtarget.hasVendorXTHeadMemIdx() || Subtarget.hasVendorXqcisls())) &&
3604 !((VT == MVT::f32 || VT == MVT::f64) &&
3605 Subtarget.hasVendorXTHeadFMemIdx()))
3606 return false;
3607 // Don't allow stores of the value. It must be used as the address.
3608 if (User->getOpcode() == ISD::STORE &&
3609 cast<StoreSDNode>(User)->getValue() == Add)
3610 return false;
3611
3612 return true;
3613}
3614
3615/// Is it profitable to fold this Add into RegRegScale load/store. If \p
3616/// Shift is non-null, then we have matched a shl+add. We allow reassociating
3617/// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) if there is a
3618/// single addi and we don't have a SHXADD instruction we could use.
3619/// FIXME: May still need to check how many and what kind of users the SHL has.
3621 SDValue Add,
3622 SDValue Shift = SDValue()) {
3623 bool FoundADDI = false;
3624 for (auto *User : Add->users()) {
3625 if (isRegRegScaleLoadOrStore(User, Add, Subtarget))
3626 continue;
3627
3628 // Allow a single ADDI that is used by loads/stores if we matched a shift.
3629 if (!Shift || FoundADDI || User->getOpcode() != ISD::ADD ||
3631 !isInt<12>(cast<ConstantSDNode>(User->getOperand(1))->getSExtValue()))
3632 return false;
3633
3634 FoundADDI = true;
3635
3636 // If we have a SHXADD instruction, prefer that over reassociating an ADDI.
3637 assert(Shift.getOpcode() == ISD::SHL);
3638 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
3639 if (Subtarget.hasShlAdd(ShiftAmt))
3640 return false;
3641
3642 // All users of the ADDI should be load/store.
3643 for (auto *ADDIUser : User->users())
3644 if (!isRegRegScaleLoadOrStore(ADDIUser, SDValue(User, 0), Subtarget))
3645 return false;
3646 }
3647
3648 return true;
3649}
3650
3652 unsigned MaxShiftAmount,
3653 SDValue &Base, SDValue &Index,
3654 SDValue &Scale) {
3655 if (Addr.getOpcode() != ISD::ADD)
3656 return false;
3657 SDValue LHS = Addr.getOperand(0);
3658 SDValue RHS = Addr.getOperand(1);
3659
3660 EVT VT = Addr.getSimpleValueType();
3661 auto SelectShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
3662 SDValue &Shift) {
3663 if (N.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(N.getOperand(1)))
3664 return false;
3665
3666 // Only match shifts by a value in range [0, MaxShiftAmount].
3667 unsigned ShiftAmt = N.getConstantOperandVal(1);
3668 if (ShiftAmt > MaxShiftAmount)
3669 return false;
3670
3671 Index = N.getOperand(0);
3672 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
3673 return true;
3674 };
3675
3676 if (auto *C1 = dyn_cast<ConstantSDNode>(RHS)) {
3677 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
3678 if (LHS.getOpcode() == ISD::ADD &&
3679 !isa<ConstantSDNode>(LHS.getOperand(1)) &&
3680 isInt<12>(C1->getSExtValue())) {
3681 if (SelectShl(LHS.getOperand(1), Index, Scale) &&
3682 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(1))) {
3683 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3684 SDLoc(Addr), VT);
3685 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3686 LHS.getOperand(0), C1Val),
3687 0);
3688 return true;
3689 }
3690
3691 // Add is commutative so we need to check both operands.
3692 if (SelectShl(LHS.getOperand(0), Index, Scale) &&
3693 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(0))) {
3694 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3695 SDLoc(Addr), VT);
3696 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3697 LHS.getOperand(1), C1Val),
3698 0);
3699 return true;
3700 }
3701 }
3702
3703 // Don't match add with constants.
3704 // FIXME: Is this profitable for large constants that have 0s in the lower
3705 // 12 bits that we can materialize with LUI?
3706 return false;
3707 }
3708
3709 // Try to match a shift on the RHS.
3710 if (SelectShl(RHS, Index, Scale)) {
3711 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, RHS))
3712 return false;
3713 Base = LHS;
3714 return true;
3715 }
3716
3717 // Try to match a shift on the LHS.
3718 if (SelectShl(LHS, Index, Scale)) {
3719 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, LHS))
3720 return false;
3721 Base = RHS;
3722 return true;
3723 }
3724
3725 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr))
3726 return false;
3727
3728 Base = LHS;
3729 Index = RHS;
3730 Scale = CurDAG->getTargetConstant(0, SDLoc(Addr), VT);
3731 return true;
3732}
3733
3735 unsigned MaxShiftAmount,
3736 unsigned Bits, SDValue &Base,
3737 SDValue &Index,
3738 SDValue &Scale) {
3739 if (!SelectAddrRegRegScale(Addr, MaxShiftAmount, Base, Index, Scale))
3740 return false;
3741
3742 if (Index.getOpcode() == ISD::AND) {
3743 auto *C = dyn_cast<ConstantSDNode>(Index.getOperand(1));
3744 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3745 Index = Index.getOperand(0);
3746 return true;
3747 }
3748 }
3749
3750 return false;
3751}
3752
3754 SDValue &Offset) {
3755 if (Addr.getOpcode() != ISD::ADD)
3756 return false;
3757
3758 if (isa<ConstantSDNode>(Addr.getOperand(1)))
3759 return false;
3760
3761 Base = Addr.getOperand(0);
3762 Offset = Addr.getOperand(1);
3763 return true;
3764}
3765
3767 SDValue &ShAmt) {
3768 ShAmt = N;
3769
3770 // Peek through zext.
3771 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
3772 ShAmt = ShAmt.getOperand(0);
3773
3774 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
3775 // amount. If there is an AND on the shift amount, we can bypass it if it
3776 // doesn't affect any of those bits.
3777 if (ShAmt.getOpcode() == ISD::AND &&
3778 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3779 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
3780
3781 // Since the max shift amount is a power of 2 we can subtract 1 to make a
3782 // mask that covers the bits needed to represent all shift amounts.
3783 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
3784 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
3785
3786 if (ShMask.isSubsetOf(AndMask)) {
3787 ShAmt = ShAmt.getOperand(0);
3788 } else {
3789 // SimplifyDemandedBits may have optimized the mask so try restoring any
3790 // bits that are known zero.
3791 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
3792 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
3793 return true;
3794 ShAmt = ShAmt.getOperand(0);
3795 }
3796 }
3797
3798 if (ShAmt.getOpcode() == ISD::ADD &&
3799 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3800 uint64_t Imm = ShAmt.getConstantOperandVal(1);
3801 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
3802 // to avoid the ADD.
3803 if (Imm != 0 && Imm % ShiftWidth == 0) {
3804 ShAmt = ShAmt.getOperand(0);
3805 return true;
3806 }
3807 } else if (ShAmt.getOpcode() == ISD::SUB &&
3808 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
3809 uint64_t Imm = ShAmt.getConstantOperandVal(0);
3810 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3811 // generate a NEG instead of a SUB of a constant.
3812 if (Imm != 0 && Imm % ShiftWidth == 0) {
3813 SDLoc DL(ShAmt);
3814 EVT VT = ShAmt.getValueType();
3815 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
3816 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
3817 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
3818 ShAmt.getOperand(1));
3819 ShAmt = SDValue(Neg, 0);
3820 return true;
3821 }
3822 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3823 // to generate a NOT instead of a SUB of a constant.
3824 if (Imm % ShiftWidth == ShiftWidth - 1) {
3825 SDLoc DL(ShAmt);
3826 EVT VT = ShAmt.getValueType();
3827 MachineSDNode *Not = CurDAG->getMachineNode(
3828 RISCV::XORI, DL, VT, ShAmt.getOperand(1),
3829 CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true));
3830 ShAmt = SDValue(Not, 0);
3831 return true;
3832 }
3833 }
3834
3835 return true;
3836}
3837
3838/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
3839/// check for equality with 0. This function emits instructions that convert the
3840/// seteq/setne into something that can be compared with 0.
3841/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
3842/// ISD::SETNE).
3844 SDValue &Val) {
3845 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
3846 "Unexpected condition code!");
3847
3848 // We're looking for a setcc.
3849 if (N->getOpcode() != ISD::SETCC)
3850 return false;
3851
3852 // Must be an equality comparison.
3853 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
3854 if (CCVal != ExpectedCCVal)
3855 return false;
3856
3857 SDValue LHS = N->getOperand(0);
3858 SDValue RHS = N->getOperand(1);
3859
3860 if (!LHS.getValueType().isScalarInteger())
3861 return false;
3862
3863 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
3864 if (isNullConstant(RHS)) {
3865 Val = LHS;
3866 return true;
3867 }
3868
3869 SDLoc DL(N);
3870
3871 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
3872 int64_t CVal = C->getSExtValue();
3873 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
3874 // non-zero otherwise.
3875 if (CVal == -2048) {
3876 Val = SDValue(
3877 CurDAG->getMachineNode(
3878 RISCV::XORI, DL, N->getValueType(0), LHS,
3879 CurDAG->getSignedTargetConstant(CVal, DL, N->getValueType(0))),
3880 0);
3881 return true;
3882 }
3883 // If the RHS is [-2047,2048], we can use addi/addiw with -RHS to produce 0
3884 // if the LHS is equal to the RHS and non-zero otherwise.
3885 if (isInt<12>(CVal) || CVal == 2048) {
3886 unsigned Opc = RISCV::ADDI;
3887 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3888 cast<VTSDNode>(LHS.getOperand(1))->getVT() == MVT::i32) {
3889 Opc = RISCV::ADDIW;
3890 LHS = LHS.getOperand(0);
3891 }
3892
3893 Val = SDValue(CurDAG->getMachineNode(Opc, DL, N->getValueType(0), LHS,
3894 CurDAG->getSignedTargetConstant(
3895 -CVal, DL, N->getValueType(0))),
3896 0);
3897 return true;
3898 }
3899 if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) {
3900 Val = SDValue(
3901 CurDAG->getMachineNode(
3902 RISCV::BINVI, DL, N->getValueType(0), LHS,
3903 CurDAG->getTargetConstant(Log2_64(CVal), DL, N->getValueType(0))),
3904 0);
3905 return true;
3906 }
3907 // Same as the addi case above but for larger immediates (signed 26-bit) use
3908 // the QC_E_ADDI instruction from the Xqcilia extension, if available. Avoid
3909 // anything which can be done with a single lui as it might be compressible.
3910 if (Subtarget->hasVendorXqcilia() && isInt<26>(CVal) &&
3911 (CVal & 0xFFF) != 0) {
3912 Val = SDValue(
3913 CurDAG->getMachineNode(
3914 RISCV::QC_E_ADDI, DL, N->getValueType(0), LHS,
3915 CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
3916 0);
3917 return true;
3918 }
3919 }
3920
3921 // If nothing else we can XOR the LHS and RHS to produce zero if they are
3922 // equal and a non-zero value if they aren't.
3923 Val = SDValue(
3924 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
3925 return true;
3926}
3927
3929 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3930 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
3931 Val = N.getOperand(0);
3932 return true;
3933 }
3934
3935 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
3936 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
3937 return N;
3938
3939 SDValue N0 = N.getOperand(0);
3940 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3941 N.getConstantOperandVal(1) == ShiftAmt &&
3942 N0.getConstantOperandVal(1) == ShiftAmt)
3943 return N0.getOperand(0);
3944
3945 return N;
3946 };
3947
3948 MVT VT = N.getSimpleValueType();
3949 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
3950 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
3951 return true;
3952 }
3953
3954 return false;
3955}
3956
3958 if (N.getOpcode() == ISD::AND) {
3959 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3960 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3961 Val = N.getOperand(0);
3962 return true;
3963 }
3964 }
3965 MVT VT = N.getSimpleValueType();
3966 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
3967 if (CurDAG->MaskedValueIsZero(N, Mask)) {
3968 Val = N;
3969 return true;
3970 }
3971
3972 return false;
3973}
3974
3975/// Look for various patterns that can be done with a SHL that can be folded
3976/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
3977/// SHXADD we are trying to match.
3979 SDValue &Val) {
3980 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
3981 SDValue N0 = N.getOperand(0);
3982
3983 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
3984 (LeftShift || N0.getOpcode() == ISD::SRL) &&
3986 uint64_t Mask = N.getConstantOperandVal(1);
3987 unsigned C2 = N0.getConstantOperandVal(1);
3988
3989 unsigned XLen = Subtarget->getXLen();
3990 if (LeftShift)
3991 Mask &= maskTrailingZeros<uint64_t>(C2);
3992 else
3993 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
3994
3995 if (isShiftedMask_64(Mask)) {
3996 unsigned Leading = XLen - llvm::bit_width(Mask);
3997 unsigned Trailing = llvm::countr_zero(Mask);
3998 if (Trailing != ShAmt)
3999 return false;
4000
4001 unsigned Opcode;
4002 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
4003 // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
4004 // followed by a SHXADD with c3 for the X amount.
4005 if (LeftShift && Leading == 0 && C2 < Trailing)
4006 Opcode = RISCV::SRLI;
4007 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
4008 // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
4009 // followed by a SHXADD with c3 for the X amount.
4010 else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
4011 Opcode = RISCV::SRLIW;
4012 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
4013 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
4014 // followed by a SHXADD using c3 for the X amount.
4015 else if (!LeftShift && Leading == C2)
4016 Opcode = RISCV::SRLI;
4017 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
4018 // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
4019 // followed by a SHXADD using c3 for the X amount.
4020 else if (!LeftShift && Leading == 32 + C2)
4021 Opcode = RISCV::SRLIW;
4022 else
4023 return false;
4024
4025 SDLoc DL(N);
4026 EVT VT = N.getValueType();
4027 ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
4028 Val = SDValue(
4029 CurDAG->getMachineNode(Opcode, DL, VT, N0.getOperand(0),
4030 CurDAG->getTargetConstant(ShAmt, DL, VT)),
4031 0);
4032 return true;
4033 }
4034 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
4036 uint64_t Mask = N.getConstantOperandVal(1);
4037 unsigned C2 = N0.getConstantOperandVal(1);
4038
4039 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
4040 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
4041 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
4042 // the X amount.
4043 if (isShiftedMask_64(Mask)) {
4044 unsigned XLen = Subtarget->getXLen();
4045 unsigned Leading = XLen - llvm::bit_width(Mask);
4046 unsigned Trailing = llvm::countr_zero(Mask);
4047 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
4048 SDLoc DL(N);
4049 EVT VT = N.getValueType();
4050 Val = SDValue(CurDAG->getMachineNode(
4051 RISCV::SRAI, DL, VT, N0.getOperand(0),
4052 CurDAG->getTargetConstant(C2 - Leading, DL, VT)),
4053 0);
4054 Val = SDValue(CurDAG->getMachineNode(
4055 RISCV::SRLI, DL, VT, Val,
4056 CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)),
4057 0);
4058 return true;
4059 }
4060 }
4061 }
4062 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
4063 (LeftShift || N.getOpcode() == ISD::SRL) &&
4064 isa<ConstantSDNode>(N.getOperand(1))) {
4065 SDValue N0 = N.getOperand(0);
4066 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
4068 uint64_t Mask = N0.getConstantOperandVal(1);
4069 if (isShiftedMask_64(Mask)) {
4070 unsigned C1 = N.getConstantOperandVal(1);
4071 unsigned XLen = Subtarget->getXLen();
4072 unsigned Leading = XLen - llvm::bit_width(Mask);
4073 unsigned Trailing = llvm::countr_zero(Mask);
4074 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
4075 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
4076 if (LeftShift && Leading == 32 && Trailing > 0 &&
4077 (Trailing + C1) == ShAmt) {
4078 SDLoc DL(N);
4079 EVT VT = N.getValueType();
4080 Val = SDValue(CurDAG->getMachineNode(
4081 RISCV::SRLIW, DL, VT, N0.getOperand(0),
4082 CurDAG->getTargetConstant(Trailing, DL, VT)),
4083 0);
4084 return true;
4085 }
4086 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
4087 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
4088 if (!LeftShift && Leading == 32 && Trailing > C1 &&
4089 (Trailing - C1) == ShAmt) {
4090 SDLoc DL(N);
4091 EVT VT = N.getValueType();
4092 Val = SDValue(CurDAG->getMachineNode(
4093 RISCV::SRLIW, DL, VT, N0.getOperand(0),
4094 CurDAG->getTargetConstant(Trailing, DL, VT)),
4095 0);
4096 return true;
4097 }
4098 }
4099 }
4100 }
4101
4102 return false;
4103}
4104
4105/// Look for various patterns that can be done with a SHL that can be folded
4106/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
4107/// SHXADD_UW we are trying to match.
4109 SDValue &Val) {
4110 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
4111 N.hasOneUse()) {
4112 SDValue N0 = N.getOperand(0);
4113 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
4114 N0.hasOneUse()) {
4115 uint64_t Mask = N.getConstantOperandVal(1);
4116 unsigned C2 = N0.getConstantOperandVal(1);
4117
4118 Mask &= maskTrailingZeros<uint64_t>(C2);
4119
4120 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
4121 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
4122 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
4123 if (isShiftedMask_64(Mask)) {
4124 unsigned Leading = llvm::countl_zero(Mask);
4125 unsigned Trailing = llvm::countr_zero(Mask);
4126 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
4127 SDLoc DL(N);
4128 EVT VT = N.getValueType();
4129 Val = SDValue(CurDAG->getMachineNode(
4130 RISCV::SLLI, DL, VT, N0.getOperand(0),
4131 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
4132 0);
4133 return true;
4134 }
4135 }
4136 }
4137 }
4138
4139 return false;
4140}
4141
4143 assert(N->getOpcode() == ISD::OR || N->getOpcode() == RISCVISD::OR_VL);
4144 if (N->getFlags().hasDisjoint())
4145 return true;
4146 return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
4147}
4148
4149bool RISCVDAGToDAGISel::selectImm64IfCheaper(int64_t Imm, int64_t OrigImm,
4150 SDValue N, SDValue &Val) {
4151 int OrigCost = RISCVMatInt::getIntMatCost(APInt(64, OrigImm), 64, *Subtarget,
4152 /*CompressionCost=*/true);
4153 int Cost = RISCVMatInt::getIntMatCost(APInt(64, Imm), 64, *Subtarget,
4154 /*CompressionCost=*/true);
4155 if (OrigCost <= Cost)
4156 return false;
4157
4158 Val = selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), Imm, *Subtarget);
4159 return true;
4160}
4161
4163 if (!isa<ConstantSDNode>(N))
4164 return false;
4165 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
4166 if ((Imm >> 31) != 1)
4167 return false;
4168
4169 for (const SDNode *U : N->users()) {
4170 switch (U->getOpcode()) {
4171 case ISD::ADD:
4172 break;
4173 case ISD::OR:
4174 if (orDisjoint(U))
4175 break;
4176 return false;
4177 default:
4178 return false;
4179 }
4180 }
4181
4182 return selectImm64IfCheaper(0xffffffff00000000 | Imm, Imm, N, Val);
4183}
4184
4186 if (!isa<ConstantSDNode>(N))
4187 return false;
4188 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
4189 if (isInt<32>(Imm))
4190 return false;
4191 if (Imm == INT64_MIN)
4192 return false;
4193
4194 for (const SDNode *U : N->users()) {
4195 switch (U->getOpcode()) {
4196 case ISD::ADD:
4197 break;
4198 case RISCVISD::VMV_V_X_VL:
4199 if (!all_of(U->users(), [](const SDNode *V) {
4200 return V->getOpcode() == ISD::ADD ||
4201 V->getOpcode() == RISCVISD::ADD_VL;
4202 }))
4203 return false;
4204 break;
4205 default:
4206 return false;
4207 }
4208 }
4209
4210 return selectImm64IfCheaper(-Imm, Imm, N, Val);
4211}
4212
4214 if (!isa<ConstantSDNode>(N))
4215 return false;
4216 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
4217
4218 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
4219 if (isInt<32>(Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
4220 return false;
4221
4222 // Abandon this transform if the constant is needed elsewhere.
4223 for (const SDNode *U : N->users()) {
4224 switch (U->getOpcode()) {
4225 case ISD::AND:
4226 case ISD::OR:
4227 case ISD::XOR:
4228 if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
4229 return false;
4230 break;
4231 case RISCVISD::VMV_V_X_VL:
4232 if (!Subtarget->hasStdExtZvkb())
4233 return false;
4234 if (!all_of(U->users(), [](const SDNode *V) {
4235 return V->getOpcode() == ISD::AND ||
4236 V->getOpcode() == RISCVISD::AND_VL;
4237 }))
4238 return false;
4239 break;
4240 default:
4241 return false;
4242 }
4243 }
4244
4245 if (isInt<32>(Imm)) {
4246 Val =
4247 selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), ~Imm, *Subtarget);
4248 return true;
4249 }
4250
4251 // For 64-bit constants, the instruction sequences get complex,
4252 // so we select inverted only if it's cheaper.
4253 return selectImm64IfCheaper(~Imm, Imm, N, Val);
4254}
4255
4256static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
4257 unsigned Bits,
4258 const TargetInstrInfo *TII) {
4259 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
4260
4261 if (!MCOpcode)
4262 return false;
4263
4264 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
4265 const uint64_t TSFlags = MCID.TSFlags;
4266 if (!RISCVII::hasSEWOp(TSFlags))
4267 return false;
4268 assert(RISCVII::hasVLOp(TSFlags));
4269
4270 unsigned ChainOpIdx = User->getNumOperands() - 1;
4271 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
4272 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
4273 unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
4274 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
4275
4276 if (UserOpNo == VLIdx)
4277 return false;
4278
4279 auto NumDemandedBits =
4280 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
4281 return NumDemandedBits && Bits >= *NumDemandedBits;
4282}
4283
4284// Return true if all users of this SDNode* only consume the lower \p Bits.
4285// This can be used to form W instructions for add/sub/mul/shl even when the
4286// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
4287// SimplifyDemandedBits has made it so some users see a sext_inreg and some
4288// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
4289// the add/sub/mul/shl to become non-W instructions. By checking the users we
4290// may be able to use a W instruction and CSE with the other instruction if
4291// this has happened. We could try to detect that the CSE opportunity exists
4292// before doing this, but that would be more complicated.
4294 const unsigned Depth) const {
4295 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
4296 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
4297 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
4298 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
4299 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
4300 isa<ConstantSDNode>(Node) || Depth != 0) &&
4301 "Unexpected opcode");
4302
4304 return false;
4305
4306 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
4307 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
4308 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
4309 return false;
4310
4311 for (SDUse &Use : Node->uses()) {
4312 SDNode *User = Use.getUser();
4313 // Users of this node should have already been instruction selected
4314 if (!User->isMachineOpcode())
4315 return false;
4316
4317 // TODO: Add more opcodes?
4318 switch (User->getMachineOpcode()) {
4319 default:
4321 break;
4322 return false;
4323 case RISCV::ADDW:
4324 case RISCV::ADDIW:
4325 case RISCV::SUBW:
4326 case RISCV::MULW:
4327 case RISCV::SLLW:
4328 case RISCV::SLLIW:
4329 case RISCV::SRAW:
4330 case RISCV::SRAIW:
4331 case RISCV::SRLW:
4332 case RISCV::SRLIW:
4333 case RISCV::DIVW:
4334 case RISCV::DIVUW:
4335 case RISCV::REMW:
4336 case RISCV::REMUW:
4337 case RISCV::ROLW:
4338 case RISCV::RORW:
4339 case RISCV::RORIW:
4340 case RISCV::CLSW:
4341 case RISCV::CLZW:
4342 case RISCV::CTZW:
4343 case RISCV::CPOPW:
4344 case RISCV::SLLI_UW:
4345 case RISCV::ABSW:
4346 case RISCV::FMV_W_X:
4347 case RISCV::FCVT_H_W:
4348 case RISCV::FCVT_H_W_INX:
4349 case RISCV::FCVT_H_WU:
4350 case RISCV::FCVT_H_WU_INX:
4351 case RISCV::FCVT_S_W:
4352 case RISCV::FCVT_S_W_INX:
4353 case RISCV::FCVT_S_WU:
4354 case RISCV::FCVT_S_WU_INX:
4355 case RISCV::FCVT_D_W:
4356 case RISCV::FCVT_D_W_INX:
4357 case RISCV::FCVT_D_WU:
4358 case RISCV::FCVT_D_WU_INX:
4359 case RISCV::TH_REVW:
4360 case RISCV::TH_SRRIW:
4361 if (Bits >= 32)
4362 break;
4363 return false;
4364 case RISCV::SLL:
4365 case RISCV::SRA:
4366 case RISCV::SRL:
4367 case RISCV::ROL:
4368 case RISCV::ROR:
4369 case RISCV::BSET:
4370 case RISCV::BCLR:
4371 case RISCV::BINV:
4372 // Shift amount operands only use log2(Xlen) bits.
4373 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
4374 break;
4375 return false;
4376 case RISCV::SLLI:
4377 // SLLI only uses the lower (XLen - ShAmt) bits.
4378 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
4379 break;
4380 return false;
4381 case RISCV::ANDI:
4382 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
4383 break;
4384 goto RecCheck;
4385 case RISCV::ORI: {
4386 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
4387 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
4388 break;
4389 [[fallthrough]];
4390 }
4391 case RISCV::AND:
4392 case RISCV::OR:
4393 case RISCV::XOR:
4394 case RISCV::XORI:
4395 case RISCV::ANDN:
4396 case RISCV::ORN:
4397 case RISCV::XNOR:
4398 case RISCV::SH1ADD:
4399 case RISCV::SH2ADD:
4400 case RISCV::SH3ADD:
4401 RecCheck:
4402 if (hasAllNBitUsers(User, Bits, Depth + 1))
4403 break;
4404 return false;
4405 case RISCV::SRLI: {
4406 unsigned ShAmt = User->getConstantOperandVal(1);
4407 // If we are shifting right by less than Bits, and users don't demand any
4408 // bits that were shifted into [Bits-1:0], then we can consider this as an
4409 // N-Bit user.
4410 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
4411 break;
4412 return false;
4413 }
4414 case RISCV::SEXT_B:
4415 case RISCV::PACKH:
4416 if (Bits >= 8)
4417 break;
4418 return false;
4419 case RISCV::SEXT_H:
4420 case RISCV::FMV_H_X:
4421 case RISCV::ZEXT_H_RV32:
4422 case RISCV::ZEXT_H_RV64:
4423 case RISCV::PACKW:
4424 if (Bits >= 16)
4425 break;
4426 return false;
4427 case RISCV::PACK:
4428 if (Bits >= (Subtarget->getXLen() / 2))
4429 break;
4430 return false;
4431 case RISCV::PPAIRE_H:
4432 // If only the lower 32-bits of the result are used, then only the
4433 // lower 16 bits of the inputs are used.
4434 if (Bits >= 16 && hasAllNBitUsers(User, 32, Depth + 1))
4435 break;
4436 return false;
4437 case RISCV::ADD_UW:
4438 case RISCV::SH1ADD_UW:
4439 case RISCV::SH2ADD_UW:
4440 case RISCV::SH3ADD_UW:
4441 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
4442 // 32 bits.
4443 if (Use.getOperandNo() == 0 && Bits >= 32)
4444 break;
4445 return false;
4446 case RISCV::SB:
4447 if (Use.getOperandNo() == 0 && Bits >= 8)
4448 break;
4449 return false;
4450 case RISCV::SH:
4451 if (Use.getOperandNo() == 0 && Bits >= 16)
4452 break;
4453 return false;
4454 case RISCV::SW:
4455 if (Use.getOperandNo() == 0 && Bits >= 32)
4456 break;
4457 return false;
4458 case RISCV::TH_EXT:
4459 case RISCV::TH_EXTU: {
4460 unsigned Msb = User->getConstantOperandVal(1);
4461 unsigned Lsb = User->getConstantOperandVal(2);
4462 // Behavior of Msb < Lsb is not well documented.
4463 if (Msb >= Lsb && Bits > Msb)
4464 break;
4465 return false;
4466 }
4467 }
4468 }
4469
4470 return true;
4471}
4472
4473// Select a constant that can be represented as (sign_extend(imm5) << imm2).
4475 SDValue &Shl2) {
4476 auto *C = dyn_cast<ConstantSDNode>(N);
4477 if (!C)
4478 return false;
4479
4480 int64_t Offset = C->getSExtValue();
4481 for (unsigned Shift = 0; Shift < 4; Shift++) {
4482 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) {
4483 EVT VT = N->getValueType(0);
4484 Simm5 = CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(N), VT);
4485 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), VT);
4486 return true;
4487 }
4488 }
4489
4490 return false;
4491}
4492
4493// Select VL as a 5 bit immediate or a value that will become a register. This
4494// allows us to choose between VSETIVLI or VSETVLI later.
4496 auto *C = dyn_cast<ConstantSDNode>(N);
4497 if (C && isUInt<5>(C->getZExtValue())) {
4498 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
4499 N->getValueType(0));
4500 } else if (C && C->isAllOnes()) {
4501 // Treat all ones as VLMax.
4502 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4503 N->getValueType(0));
4504 } else if (isa<RegisterSDNode>(N) &&
4505 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
4506 // All our VL operands use an operand that allows GPRNoX0 or an immediate
4507 // as the register class. Convert X0 to a special immediate to pass the
4508 // MachineVerifier. This is recognized specially by the vsetvli insertion
4509 // pass.
4510 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4511 N->getValueType(0));
4512 } else {
4513 VL = N;
4514 }
4515
4516 return true;
4517}
4518
4520 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
4521 if (!N.getOperand(0).isUndef())
4522 return SDValue();
4523 N = N.getOperand(1);
4524 }
4525 SDValue Splat = N;
4526 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
4527 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
4528 !Splat.getOperand(0).isUndef())
4529 return SDValue();
4530 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
4531 return Splat;
4532}
4533
4536 if (!Splat)
4537 return false;
4538
4539 SplatVal = Splat.getOperand(1);
4540 return true;
4541}
4542
4544 SelectionDAG &DAG,
4545 const RISCVSubtarget &Subtarget,
4546 std::function<bool(int64_t)> ValidateImm,
4547 bool Decrement = false) {
4549 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
4550 return false;
4551
4552 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
4553 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
4554 "Unexpected splat operand type");
4555
4556 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
4557 // type is wider than the resulting vector element type: an implicit
4558 // truncation first takes place. Therefore, perform a manual
4559 // truncation/sign-extension in order to ignore any truncated bits and catch
4560 // any zero-extended immediate.
4561 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
4562 // sign-extending to (XLenVT -1).
4563 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
4564
4565 int64_t SplatImm = SplatConst.getSExtValue();
4566
4567 if (!ValidateImm(SplatImm))
4568 return false;
4569
4570 if (Decrement)
4571 SplatImm -= 1;
4572
4573 SplatVal =
4574 DAG.getSignedTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
4575 return true;
4576}
4577
4579 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
4580 [](int64_t Imm) { return isInt<5>(Imm); });
4581}
4582
4584 return selectVSplatImmHelper(
4585 N, SplatVal, *CurDAG, *Subtarget,
4586 [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4587 /*Decrement=*/true);
4588}
4589
4591 return selectVSplatImmHelper(
4592 N, SplatVal, *CurDAG, *Subtarget,
4593 [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4594 /*Decrement=*/false);
4595}
4596
4598 SDValue &SplatVal) {
4599 return selectVSplatImmHelper(
4600 N, SplatVal, *CurDAG, *Subtarget,
4601 [](int64_t Imm) { return Imm != 0 && Imm >= -15 && Imm <= 16; },
4602 /*Decrement=*/true);
4603}
4604
4606 SDValue &SplatVal) {
4607 return selectVSplatImmHelper(
4608 N, SplatVal, *CurDAG, *Subtarget,
4609 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
4610}
4611
4614 return Splat && selectNegImm(Splat.getOperand(1), SplatVal);
4615}
4616
4618 auto IsExtOrTrunc = [](SDValue N) {
4619 switch (N->getOpcode()) {
4620 case ISD::SIGN_EXTEND:
4621 case ISD::ZERO_EXTEND:
4622 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
4623 // inactive elements will be undef.
4624 case RISCVISD::TRUNCATE_VECTOR_VL:
4625 case RISCVISD::VSEXT_VL:
4626 case RISCVISD::VZEXT_VL:
4627 return true;
4628 default:
4629 return false;
4630 }
4631 };
4632
4633 // We can have multiple nested nodes, so unravel them all if needed.
4634 while (IsExtOrTrunc(N)) {
4635 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
4636 return false;
4637 N = N->getOperand(0);
4638 }
4639
4640 return selectVSplat(N, SplatVal);
4641}
4642
4644 // Allow bitcasts from XLenVT -> FP.
4645 if (N.getOpcode() == ISD::BITCAST &&
4646 N.getOperand(0).getValueType() == Subtarget->getXLenVT()) {
4647 Imm = N.getOperand(0);
4648 return true;
4649 }
4650 // Allow moves from XLenVT to FP.
4651 if (N.getOpcode() == RISCVISD::FMV_H_X ||
4652 N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
4653 Imm = N.getOperand(0);
4654 return true;
4655 }
4656
4657 // Otherwise, look for FP constants that can materialized with scalar int.
4659 if (!CFP)
4660 return false;
4661 const APFloat &APF = CFP->getValueAPF();
4662 // td can handle +0.0 already.
4663 if (APF.isPosZero())
4664 return false;
4665
4666 MVT VT = CFP->getSimpleValueType(0);
4667
4668 MVT XLenVT = Subtarget->getXLenVT();
4669 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
4670 assert(APF.isNegZero() && "Unexpected constant.");
4671 return false;
4672 }
4673 SDLoc DL(N);
4674 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
4675 *Subtarget);
4676 return true;
4677}
4678
4680 SDValue &Imm) {
4681 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
4682 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
4683
4684 if (!isInt<5>(ImmVal))
4685 return false;
4686
4687 Imm = CurDAG->getSignedTargetConstant(ImmVal, SDLoc(N),
4688 Subtarget->getXLenVT());
4689 return true;
4690 }
4691
4692 return false;
4693}
4694
4695// Match XOR with a VMSET_VL operand. Return the other operand.
4697 if (N.getOpcode() != ISD::XOR)
4698 return false;
4699
4700 if (N.getOperand(0).getOpcode() == RISCVISD::VMSET_VL) {
4701 Res = N.getOperand(1);
4702 return true;
4703 }
4704
4705 if (N.getOperand(1).getOpcode() == RISCVISD::VMSET_VL) {
4706 Res = N.getOperand(0);
4707 return true;
4708 }
4709
4710 return false;
4711}
4712
4713// Match VMXOR_VL with a VMSET_VL operand. Making sure that that VL operand
4714// matches the parent's VL. Return the other operand of the VMXOR_VL.
4716 SDValue &Res) {
4717 if (N.getOpcode() != RISCVISD::VMXOR_VL)
4718 return false;
4719
4720 assert(Parent &&
4721 (Parent->getOpcode() == RISCVISD::VMAND_VL ||
4722 Parent->getOpcode() == RISCVISD::VMOR_VL ||
4723 Parent->getOpcode() == RISCVISD::VMXOR_VL) &&
4724 "Unexpected parent");
4725
4726 // The VL should match the parent.
4727 if (Parent->getOperand(2) != N->getOperand(2))
4728 return false;
4729
4730 if (N.getOperand(0).getOpcode() == RISCVISD::VMSET_VL) {
4731 Res = N.getOperand(1);
4732 return true;
4733 }
4734
4735 if (N.getOperand(1).getOpcode() == RISCVISD::VMSET_VL) {
4736 Res = N.getOperand(0);
4737 return true;
4738 }
4739
4740 return false;
4741}
4742
4743// Try to remove sext.w if the input is a W instruction or can be made into
4744// a W instruction cheaply.
4745bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
4746 // Look for the sext.w pattern, addiw rd, rs1, 0.
4747 if (N->getMachineOpcode() != RISCV::ADDIW ||
4748 !isNullConstant(N->getOperand(1)))
4749 return false;
4750
4751 SDValue N0 = N->getOperand(0);
4752 if (!N0.isMachineOpcode())
4753 return false;
4754
4755 switch (N0.getMachineOpcode()) {
4756 default:
4757 break;
4758 case RISCV::ADD:
4759 case RISCV::ADDI:
4760 case RISCV::SUB:
4761 case RISCV::MUL:
4762 case RISCV::SLLI: {
4763 // Convert sext.w+add/sub/mul to their W instructions. This will create
4764 // a new independent instruction. This improves latency.
4765 unsigned Opc;
4766 switch (N0.getMachineOpcode()) {
4767 default:
4768 llvm_unreachable("Unexpected opcode!");
4769 case RISCV::ADD: Opc = RISCV::ADDW; break;
4770 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
4771 case RISCV::SUB: Opc = RISCV::SUBW; break;
4772 case RISCV::MUL: Opc = RISCV::MULW; break;
4773 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
4774 }
4775
4776 SDValue N00 = N0.getOperand(0);
4777 SDValue N01 = N0.getOperand(1);
4778
4779 // Shift amount needs to be uimm5.
4780 if (N0.getMachineOpcode() == RISCV::SLLI &&
4781 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
4782 break;
4783
4784 SDNode *Result =
4785 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
4786 N00, N01);
4787 ReplaceUses(N, Result);
4788 return true;
4789 }
4790 case RISCV::ADDW:
4791 case RISCV::ADDIW:
4792 case RISCV::SUBW:
4793 case RISCV::MULW:
4794 case RISCV::SLLIW:
4795 case RISCV::PACKW:
4796 case RISCV::TH_MULAW:
4797 case RISCV::TH_MULAH:
4798 case RISCV::TH_MULSW:
4799 case RISCV::TH_MULSH:
4800 if (N0.getValueType() == MVT::i32)
4801 break;
4802
4803 // Result is already sign extended just remove the sext.w.
4804 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
4805 ReplaceUses(N, N0.getNode());
4806 return true;
4807 }
4808
4809 return false;
4810}
4811
4812static bool usesAllOnesMask(SDValue MaskOp) {
4813 const auto IsVMSet = [](unsigned Opc) {
4814 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
4815 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
4816 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
4817 Opc == RISCV::PseudoVMSET_M_B8;
4818 };
4819
4820 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
4821 // undefined behaviour if it's the wrong bitwidth, so we could choose to
4822 // assume that it's all-ones? Same applies to its VL.
4823 return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode());
4824}
4825
4826static bool isImplicitDef(SDValue V) {
4827 if (!V.isMachineOpcode())
4828 return false;
4829 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
4830 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
4831 if (!isImplicitDef(V.getOperand(I)))
4832 return false;
4833 return true;
4834 }
4835 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
4836}
4837
4838// Optimize masked RVV pseudo instructions with a known all-ones mask to their
4839// corresponding "unmasked" pseudo versions.
4840bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
4841 const RISCV::RISCVMaskedPseudoInfo *I =
4842 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
4843 if (!I)
4844 return false;
4845
4846 unsigned MaskOpIdx = I->MaskOpIdx;
4847 if (!usesAllOnesMask(N->getOperand(MaskOpIdx)))
4848 return false;
4849
4850 // There are two classes of pseudos in the table - compares and
4851 // everything else. See the comment on RISCVMaskedPseudo for details.
4852 const unsigned Opc = I->UnmaskedPseudo;
4853 const MCInstrDesc &MCID = TII->get(Opc);
4854 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID);
4855
4856 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
4857 const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(MaskedMCID);
4858
4859 assert((RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ||
4861 "Unmasked pseudo has policy but masked pseudo doesn't?");
4862 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru &&
4863 "Unexpected pseudo structure");
4864 assert(!(HasPassthru && !MaskedHasPassthru) &&
4865 "Unmasked pseudo has passthru but masked pseudo doesn't?");
4866
4868 // Skip the passthru operand at index 0 if the unmasked don't have one.
4869 bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
4870 bool DropPolicy = !RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
4871 RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags);
4872 bool HasChainOp =
4873 N->getOperand(N->getNumOperands() - 1).getValueType() == MVT::Other;
4874 unsigned LastOpNum = N->getNumOperands() - 1 - HasChainOp;
4875 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
4876 // Skip the mask
4877 SDValue Op = N->getOperand(I);
4878 if (I == MaskOpIdx)
4879 continue;
4880 if (DropPolicy && I == LastOpNum)
4881 continue;
4882 Ops.push_back(Op);
4883 }
4884
4885 MachineSDNode *Result =
4886 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4887
4888 if (!N->memoperands_empty())
4889 CurDAG->setNodeMemRefs(Result, N->memoperands());
4890
4891 Result->setFlags(N->getFlags());
4892 ReplaceUses(N, Result);
4893
4894 return true;
4895}
4896
4897/// If our passthru is an implicit_def, use noreg instead. This side
4898/// steps issues with MachineCSE not being able to CSE expressions with
4899/// IMPLICIT_DEF operands while preserving the semantic intent. See
4900/// pr64282 for context. Note that this transform is the last one
4901/// performed at ISEL DAG to DAG.
4902bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4903 bool MadeChange = false;
4904 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4905
4906 while (Position != CurDAG->allnodes_begin()) {
4907 SDNode *N = &*--Position;
4908 if (N->use_empty() || !N->isMachineOpcode())
4909 continue;
4910
4911 const unsigned Opc = N->getMachineOpcode();
4912 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
4914 !isImplicitDef(N->getOperand(0)))
4915 continue;
4916
4918 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
4919 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4920 SDValue Op = N->getOperand(I);
4921 Ops.push_back(Op);
4922 }
4923
4924 MachineSDNode *Result =
4925 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4926 Result->setFlags(N->getFlags());
4927 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
4928 ReplaceUses(N, Result);
4929 MadeChange = true;
4930 }
4931 return MadeChange;
4932}
4933
4934
4935// This pass converts a legalized DAG into a RISCV-specific DAG, ready
4936// for instruction scheduling.
4938 CodeGenOptLevel OptLevel) {
4939 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4940}
4941
4943
4948
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define X(NUM, ENUM, NAME)
Definition ELF.h:849
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool getVal(MDTuple *MD, const char *Key, uint64_t &Val)
static bool usesAllOnesMask(SDValue MaskOp)
static Register getTileReg(uint64_t TileNum)
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
static bool isRegRegScaleLoadOrStore(SDNode *User, SDValue Add, const RISCVSubtarget &Subtarget)
Return true if this a load/store that we have a RegRegScale instruction for.
static std::pair< SDValue, SDValue > extractGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, SDValue Pair)
#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm, bool Decrement=false)
static unsigned getSegInstNF(unsigned Intrinsic)
static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget, SDValue Add, SDValue Shift=SDValue())
Is it profitable to fold this Add into RegRegScale load/store.
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false)
#define INST_ALL_NF_CASE_WITH_FF(NAME)
#define CASE_VMSLT_OPCODES(lmulenum, suffix)
static SDValue buildGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, MVT VT, SDValue Lo, SDValue Hi)
bool isRegImmLoadOrStore(SDNode *User, SDValue Add)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
static SDValue findVSplat(SDValue N)
#define INST_ALL_NF_CASE(NAME)
static bool isApplicableToPLI(int Val)
Contains matchers for matching SelectionDAG nodes and values.
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define PASS_NAME
DEMANGLE_DUMP_METHOD void dump() const
bool isZero() const
Definition APFloat.h:1512
APInt bitcastToAPInt() const
Definition APFloat.h:1408
bool isPosZero() const
Definition APFloat.h:1527
bool isNegZero() const
Definition APFloat.h:1528
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
LLVM_ABI bool isSplat(unsigned SplatSizeInBits) const
Check if the APInt consists of a repeated bit pattern.
Definition APInt.cpp:630
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
This class is used to form a handle around another node that is persistent and is updated across invo...
const SDValue & getValue() const
static StringRef getMemConstraintName(ConstraintCode C)
Definition InlineAsm.h:475
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Machine Value Type.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TargetMachine, CodeGenOptLevel OptLevel)
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectNegImm(SDValue N, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool areOffsetsWithinAlignment(SDValue Addr, Align Alignment)
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset should be all zeros.
bool selectZExtImm32(SDValue N, SDValue &Val)
bool SelectAddrRegZextRegScale(SDValue Addr, unsigned MaxShiftAmount, unsigned Bits, SDValue &Base, SDValue &Index, SDValue &Scale)
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset)
bool selectVMNOT_VLOp(SDNode *Parent, SDValue N, SDValue &Res)
void selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
void selectVLSEGFF(SDNode *Node, unsigned NF, bool IsMasked)
bool selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool orDisjoint(const SDNode *Node) const
bool tryWideningMulAcc(SDNode *Node, const SDLoc &DL)
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool tryUnsignedBitfieldInsertInZero(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
bool selectInvLogicImm(SDValue N, SDValue &Val)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
void selectXSfmmVSET(SDNode *Node)
bool trySignedBitfieldInsertInSign(SDNode *Node)
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool SelectAddrRegImm9(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
bool selectScalarFPAsInt(SDValue N, SDValue &Imm)
bool hasAllBUsers(SDNode *Node) const
void selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
bool selectVMNOTOp(SDValue N, SDValue &Res)
void selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool selectVSplatImm64Neg(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
void selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
bool hasShlAdd(int64_t ShAmt) const
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVVType::VLMUL getLMUL(MVT VT)
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
iterator_range< user_iterator > users()
Definition Value.h:427
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI unsigned encodeXSfmmVType(unsigned SEW, unsigned Widen, bool AltFmt)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
LLVM_ABI unsigned getSEWLMULRatio(unsigned SEW, VLMUL VLMul)
static unsigned decodeTWiden(unsigned TWiden)
LLVM_ABI unsigned encodeVTYPE(VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic, bool AltFmt=false)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
std::optional< unsigned > getVectorLowDemandedScalarBits(unsigned Opcode, unsigned Log2SEW)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
static const MachineMemOperand::Flags MONontemporalBit1
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
unsigned M1(unsigned Val)
Definition VE.h:377
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
constexpr T maskTrailingZeros(unsigned N)
Create a bitmask with the N right-most bits set to 0, and all other bits set to 1.
Definition MathExtras.h:94
@ Add
Sum of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.