LLVM 22.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
22#include "llvm/IR/IntrinsicsRISCV.h"
24#include "llvm/Support/Debug.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "riscv-isel"
31#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
32
34 "riscv-use-rematerializable-movimm", cl::Hidden,
35 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
36 "constant materialization"),
37 cl::init(false));
38
39#define GET_DAGISEL_BODY RISCVDAGToDAGISel
40#include "RISCVGenDAGISel.inc"
41
43 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
44
45 bool MadeChange = false;
46 while (Position != CurDAG->allnodes_begin()) {
47 SDNode *N = &*--Position;
48 if (N->use_empty())
49 continue;
50
51 SDValue Result;
52 switch (N->getOpcode()) {
53 case ISD::SPLAT_VECTOR: {
54 if (Subtarget->enablePExtCodeGen())
55 break;
56 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
57 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
58 MVT VT = N->getSimpleValueType(0);
59 unsigned Opc =
60 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
61 SDLoc DL(N);
62 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
63 SDValue Src = N->getOperand(0);
64 if (VT.isInteger())
65 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
66 N->getOperand(0));
67 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
68 break;
69 }
70 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
71 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
72 // load. Done after lowering and combining so that we have a chance to
73 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
74 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
75 MVT VT = N->getSimpleValueType(0);
76 SDValue Passthru = N->getOperand(0);
77 SDValue Lo = N->getOperand(1);
78 SDValue Hi = N->getOperand(2);
79 SDValue VL = N->getOperand(3);
80 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
81 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
82 "Unexpected VTs!");
83 MachineFunction &MF = CurDAG->getMachineFunction();
84 SDLoc DL(N);
85
86 // Create temporary stack for each expanding node.
87 SDValue StackSlot =
88 CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
89 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
91
92 SDValue Chain = CurDAG->getEntryNode();
93 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
94
95 SDValue OffsetSlot =
96 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
97 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
98 Align(8));
99
100 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
101
102 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
103 SDValue IntID =
104 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
105 SDValue Ops[] = {Chain,
106 IntID,
107 Passthru,
108 StackSlot,
109 CurDAG->getRegister(RISCV::X0, MVT::i64),
110 VL};
111
112 Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
113 MVT::i64, MPI, Align(8),
115 break;
116 }
117 case ISD::FP_EXTEND: {
118 // We only have vector patterns for riscv_fpextend_vl in isel.
119 SDLoc DL(N);
120 MVT VT = N->getSimpleValueType(0);
121 if (!VT.isVector())
122 break;
123 SDValue VLMAX = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
124 SDValue TrueMask = CurDAG->getNode(
125 RISCVISD::VMSET_VL, DL, VT.changeVectorElementType(MVT::i1), VLMAX);
126 Result = CurDAG->getNode(RISCVISD::FP_EXTEND_VL, DL, VT, N->getOperand(0),
127 TrueMask, VLMAX);
128 break;
129 }
130 }
131
132 if (Result) {
133 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
134 LLVM_DEBUG(N->dump(CurDAG));
135 LLVM_DEBUG(dbgs() << "\nNew: ");
136 LLVM_DEBUG(Result->dump(CurDAG));
137 LLVM_DEBUG(dbgs() << "\n");
138
139 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
140 MadeChange = true;
141 }
142 }
143
144 if (MadeChange)
145 CurDAG->RemoveDeadNodes();
146}
147
149 HandleSDNode Dummy(CurDAG->getRoot());
150 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
151
152 bool MadeChange = false;
153 while (Position != CurDAG->allnodes_begin()) {
154 SDNode *N = &*--Position;
155 // Skip dead nodes and any non-machine opcodes.
156 if (N->use_empty() || !N->isMachineOpcode())
157 continue;
158
159 MadeChange |= doPeepholeSExtW(N);
160
161 // FIXME: This is here only because the VMerge transform doesn't
162 // know how to handle masked true inputs. Once that has been moved
163 // to post-ISEL, this can be deleted as well.
164 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
165 }
166
167 CurDAG->setRoot(Dummy.getValue());
168
169 // After we're done with everything else, convert IMPLICIT_DEF
170 // passthru operands to NoRegister. This is required to workaround
171 // an optimization deficiency in MachineCSE. This really should
172 // be merged back into each of the patterns (i.e. there's no good
173 // reason not to go directly to NoReg), but is being done this way
174 // to allow easy backporting.
175 MadeChange |= doPeepholeNoRegPassThru();
176
177 if (MadeChange)
178 CurDAG->RemoveDeadNodes();
179}
180
181static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
183 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
184 for (const RISCVMatInt::Inst &Inst : Seq) {
185 SDValue SDImm = CurDAG->getSignedTargetConstant(Inst.getImm(), DL, VT);
186 SDNode *Result = nullptr;
187 switch (Inst.getOpndKind()) {
188 case RISCVMatInt::Imm:
189 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
190 break;
192 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
193 CurDAG->getRegister(RISCV::X0, VT));
194 break;
196 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
197 break;
199 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
200 break;
201 }
202
203 // Only the first instruction has X0 as its source.
204 SrcReg = SDValue(Result, 0);
205 }
206
207 return SrcReg;
208}
209
210static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
211 int64_t Imm, const RISCVSubtarget &Subtarget) {
213
214 // Use a rematerializable pseudo instruction for short sequences if enabled.
215 if (Seq.size() == 2 && UsePseudoMovImm)
216 return SDValue(
217 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
218 CurDAG->getSignedTargetConstant(Imm, DL, VT)),
219 0);
220
221 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
222 // worst an LUI+ADDIW. This will require an extra register, but avoids a
223 // constant pool.
224 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
225 // low and high 32 bits are the same and bit 31 and 63 are set.
226 if (Seq.size() > 3) {
227 unsigned ShiftAmt, AddOpc;
229 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
230 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
231 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
232
233 SDValue SLLI = SDValue(
234 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
235 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
236 0);
237 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
238 }
239 }
240
241 // Otherwise, use the original sequence.
242 return selectImmSeq(CurDAG, DL, VT, Seq);
243}
244
246 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
247 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
248 bool IsLoad, MVT *IndexVT) {
249 SDValue Chain = Node->getOperand(0);
250
251 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
252
253 if (IsStridedOrIndexed) {
254 Operands.push_back(Node->getOperand(CurOp++)); // Index.
255 if (IndexVT)
256 *IndexVT = Operands.back()->getSimpleValueType(0);
257 }
258
259 if (IsMasked) {
260 SDValue Mask = Node->getOperand(CurOp++);
261 Operands.push_back(Mask);
262 }
263 SDValue VL;
264 selectVLOp(Node->getOperand(CurOp++), VL);
265 Operands.push_back(VL);
266
267 MVT XLenVT = Subtarget->getXLenVT();
268 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
269 Operands.push_back(SEWOp);
270
271 // At the IR layer, all the masked load intrinsics have policy operands,
272 // none of the others do. All have passthru operands. For our pseudos,
273 // all loads have policy operands.
274 if (IsLoad) {
276 if (IsMasked)
277 Policy = Node->getConstantOperandVal(CurOp++);
278 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
279 Operands.push_back(PolicyOp);
280 }
281
282 Operands.push_back(Chain); // Chain.
283}
284
285void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
286 bool IsStrided) {
287 SDLoc DL(Node);
288 MVT VT = Node->getSimpleValueType(0);
289 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
291
292 unsigned CurOp = 2;
294
295 Operands.push_back(Node->getOperand(CurOp++));
296
297 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
298 Operands, /*IsLoad=*/true);
299
300 const RISCV::VLSEGPseudo *P =
301 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
302 static_cast<unsigned>(LMUL));
303 MachineSDNode *Load =
304 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
305
306 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
307
308 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
309 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
310 CurDAG->RemoveDeadNode(Node);
311}
312
314 bool IsMasked) {
315 SDLoc DL(Node);
316 MVT VT = Node->getSimpleValueType(0);
317 MVT XLenVT = Subtarget->getXLenVT();
318 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
320
321 unsigned CurOp = 2;
323
324 Operands.push_back(Node->getOperand(CurOp++));
325
326 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
327 /*IsStridedOrIndexed*/ false, Operands,
328 /*IsLoad=*/true);
329
330 const RISCV::VLSEGPseudo *P =
331 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
332 Log2SEW, static_cast<unsigned>(LMUL));
333 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
334 XLenVT, MVT::Other, Operands);
335
336 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
337
338 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); // Result
339 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); // VL
340 ReplaceUses(SDValue(Node, 2), SDValue(Load, 2)); // Chain
341 CurDAG->RemoveDeadNode(Node);
342}
343
344void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
345 bool IsOrdered) {
346 SDLoc DL(Node);
347 MVT VT = Node->getSimpleValueType(0);
348 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
350
351 unsigned CurOp = 2;
353
354 Operands.push_back(Node->getOperand(CurOp++));
355
356 MVT IndexVT;
357 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
358 /*IsStridedOrIndexed*/ true, Operands,
359 /*IsLoad=*/true, &IndexVT);
360
361#ifndef NDEBUG
362 // Number of element = RVVBitsPerBlock * LMUL / SEW
363 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
364 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
365 if (DecodedLMUL.second)
366 ContainedTyNumElts /= DecodedLMUL.first;
367 else
368 ContainedTyNumElts *= DecodedLMUL.first;
369 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
370 "Element count mismatch");
371#endif
372
374 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
375 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
376 reportFatalUsageError("The V extension does not support EEW=64 for index "
377 "values when XLEN=32");
378 }
379 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
380 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
381 static_cast<unsigned>(IndexLMUL));
382 MachineSDNode *Load =
383 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
384
385 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
386
387 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
388 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
389 CurDAG->RemoveDeadNode(Node);
390}
391
392void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
393 bool IsStrided) {
394 SDLoc DL(Node);
395 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
396 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
398
399 unsigned CurOp = 2;
401
402 Operands.push_back(Node->getOperand(CurOp++));
403
404 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
405 Operands);
406
407 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
408 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
409 MachineSDNode *Store =
410 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
411
412 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
413
414 ReplaceNode(Node, Store);
415}
416
417void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
418 bool IsOrdered) {
419 SDLoc DL(Node);
420 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
421 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
423
424 unsigned CurOp = 2;
426
427 Operands.push_back(Node->getOperand(CurOp++));
428
429 MVT IndexVT;
430 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
431 /*IsStridedOrIndexed*/ true, Operands,
432 /*IsLoad=*/false, &IndexVT);
433
434#ifndef NDEBUG
435 // Number of element = RVVBitsPerBlock * LMUL / SEW
436 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
437 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
438 if (DecodedLMUL.second)
439 ContainedTyNumElts /= DecodedLMUL.first;
440 else
441 ContainedTyNumElts *= DecodedLMUL.first;
442 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
443 "Element count mismatch");
444#endif
445
447 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
448 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
449 reportFatalUsageError("The V extension does not support EEW=64 for index "
450 "values when XLEN=32");
451 }
452 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
453 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
454 static_cast<unsigned>(IndexLMUL));
455 MachineSDNode *Store =
456 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
457
458 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
459
460 ReplaceNode(Node, Store);
461}
462
464 if (!Subtarget->hasVInstructions())
465 return;
466
467 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
468
469 SDLoc DL(Node);
470 MVT XLenVT = Subtarget->getXLenVT();
471
472 unsigned IntNo = Node->getConstantOperandVal(0);
473
474 assert((IntNo == Intrinsic::riscv_vsetvli ||
475 IntNo == Intrinsic::riscv_vsetvlimax) &&
476 "Unexpected vsetvli intrinsic");
477
478 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
479 unsigned Offset = (VLMax ? 1 : 2);
480
481 assert(Node->getNumOperands() == Offset + 2 &&
482 "Unexpected number of operands");
483
484 unsigned SEW =
485 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
486 RISCVVType::VLMUL VLMul = static_cast<RISCVVType::VLMUL>(
487 Node->getConstantOperandVal(Offset + 1) & 0x7);
488
489 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
490 /*MaskAgnostic*/ true);
491 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
492
493 SDValue VLOperand;
494 unsigned Opcode = RISCV::PseudoVSETVLI;
495 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
496 if (auto VLEN = Subtarget->getRealVLen())
497 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
498 VLMax = true;
499 }
500 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
501 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
502 Opcode = RISCV::PseudoVSETVLIX0;
503 } else {
504 VLOperand = Node->getOperand(1);
505
506 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
507 uint64_t AVL = C->getZExtValue();
508 if (isUInt<5>(AVL)) {
509 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
510 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
511 XLenVT, VLImm, VTypeIOp));
512 return;
513 }
514 }
515 }
516
518 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
519}
520
522 if (!Subtarget->hasVendorXSfmmbase())
523 return;
524
525 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
526
527 SDLoc DL(Node);
528 MVT XLenVT = Subtarget->getXLenVT();
529
530 unsigned IntNo = Node->getConstantOperandVal(0);
531
532 assert((IntNo == Intrinsic::riscv_sf_vsettnt ||
533 IntNo == Intrinsic::riscv_sf_vsettm ||
534 IntNo == Intrinsic::riscv_sf_vsettk) &&
535 "Unexpected XSfmm vset intrinsic");
536
537 unsigned SEW = RISCVVType::decodeVSEW(Node->getConstantOperandVal(2));
538 unsigned Widen = RISCVVType::decodeTWiden(Node->getConstantOperandVal(3));
539 unsigned PseudoOpCode =
540 IntNo == Intrinsic::riscv_sf_vsettnt ? RISCV::PseudoSF_VSETTNT
541 : IntNo == Intrinsic::riscv_sf_vsettm ? RISCV::PseudoSF_VSETTM
542 : RISCV::PseudoSF_VSETTK;
543
544 if (IntNo == Intrinsic::riscv_sf_vsettnt) {
545 unsigned VTypeI = RISCVVType::encodeXSfmmVType(SEW, Widen, 0);
546 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
547
548 ReplaceNode(Node, CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
549 Node->getOperand(1), VTypeIOp));
550 } else {
551 SDValue Log2SEW = CurDAG->getTargetConstant(Log2_32(SEW), DL, XLenVT);
552 SDValue TWiden = CurDAG->getTargetConstant(Widen, DL, XLenVT);
554 CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
555 Node->getOperand(1), Log2SEW, TWiden));
556 }
557}
558
560 MVT VT = Node->getSimpleValueType(0);
561 unsigned Opcode = Node->getOpcode();
562 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
563 "Unexpected opcode");
564 SDLoc DL(Node);
565
566 // For operations of the form (x << C1) op C2, check if we can use
567 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
568 SDValue N0 = Node->getOperand(0);
569 SDValue N1 = Node->getOperand(1);
570
572 if (!Cst)
573 return false;
574
575 int64_t Val = Cst->getSExtValue();
576
577 // Check if immediate can already use ANDI/ORI/XORI.
578 if (isInt<12>(Val))
579 return false;
580
581 SDValue Shift = N0;
582
583 // If Val is simm32 and we have a sext_inreg from i32, then the binop
584 // produces at least 33 sign bits. We can peek through the sext_inreg and use
585 // a SLLIW at the end.
586 bool SignExt = false;
587 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
588 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
589 SignExt = true;
590 Shift = N0.getOperand(0);
591 }
592
593 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
594 return false;
595
597 if (!ShlCst)
598 return false;
599
600 uint64_t ShAmt = ShlCst->getZExtValue();
601
602 // Make sure that we don't change the operation by removing bits.
603 // This only matters for OR and XOR, AND is unaffected.
604 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
605 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
606 return false;
607
608 int64_t ShiftedVal = Val >> ShAmt;
609 if (!isInt<12>(ShiftedVal))
610 return false;
611
612 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
613 if (SignExt && ShAmt >= 32)
614 return false;
615
616 // Ok, we can reorder to get a smaller immediate.
617 unsigned BinOpc;
618 switch (Opcode) {
619 default: llvm_unreachable("Unexpected opcode");
620 case ISD::AND: BinOpc = RISCV::ANDI; break;
621 case ISD::OR: BinOpc = RISCV::ORI; break;
622 case ISD::XOR: BinOpc = RISCV::XORI; break;
623 }
624
625 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
626
627 SDNode *BinOp = CurDAG->getMachineNode(
628 BinOpc, DL, VT, Shift.getOperand(0),
629 CurDAG->getSignedTargetConstant(ShiftedVal, DL, VT));
630 SDNode *SLLI =
631 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
632 CurDAG->getTargetConstant(ShAmt, DL, VT));
633 ReplaceNode(Node, SLLI);
634 return true;
635}
636
638 unsigned Opc;
639
640 if (Subtarget->hasVendorXTHeadBb())
641 Opc = RISCV::TH_EXT;
642 else if (Subtarget->hasVendorXAndesPerf())
643 Opc = RISCV::NDS_BFOS;
644 else if (Subtarget->hasVendorXqcibm())
645 Opc = RISCV::QC_EXT;
646 else
647 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
648 return false;
649
650 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
651 if (!N1C)
652 return false;
653
654 SDValue N0 = Node->getOperand(0);
655 if (!N0.hasOneUse())
656 return false;
657
658 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb,
659 const SDLoc &DL, MVT VT) {
660 if (Opc == RISCV::QC_EXT) {
661 // QC.EXT X, width, shamt
662 // shamt is the same as Lsb
663 // width is the number of bits to extract from the Lsb
664 Msb = Msb - Lsb + 1;
665 }
666 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
667 CurDAG->getTargetConstant(Msb, DL, VT),
668 CurDAG->getTargetConstant(Lsb, DL, VT));
669 };
670
671 SDLoc DL(Node);
672 MVT VT = Node->getSimpleValueType(0);
673 const unsigned RightShAmt = N1C->getZExtValue();
674
675 // Transform (sra (shl X, C1) C2) with C1 < C2
676 // -> (SignedBitfieldExtract X, msb, lsb)
677 if (N0.getOpcode() == ISD::SHL) {
678 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
679 if (!N01C)
680 return false;
681
682 const unsigned LeftShAmt = N01C->getZExtValue();
683 // Make sure that this is a bitfield extraction (i.e., the shift-right
684 // amount can not be less than the left-shift).
685 if (LeftShAmt > RightShAmt)
686 return false;
687
688 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
689 const unsigned Msb = MsbPlusOne - 1;
690 const unsigned Lsb = RightShAmt - LeftShAmt;
691
692 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
693 ReplaceNode(Node, Sbe);
694 return true;
695 }
696
697 // Transform (sra (sext_inreg X, _), C) ->
698 // (SignedBitfieldExtract X, msb, lsb)
699 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
700 unsigned ExtSize =
701 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
702
703 // ExtSize of 32 should use sraiw via tablegen pattern.
704 if (ExtSize == 32)
705 return false;
706
707 const unsigned Msb = ExtSize - 1;
708 // If the shift-right amount is greater than Msb, it means that extracts
709 // the X[Msb] bit and sign-extend it.
710 const unsigned Lsb = RightShAmt > Msb ? Msb : RightShAmt;
711
712 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
713 ReplaceNode(Node, Sbe);
714 return true;
715 }
716
717 return false;
718}
719
721 // Only supported with XAndesPerf at the moment.
722 if (!Subtarget->hasVendorXAndesPerf())
723 return false;
724
725 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
726 if (!N1C)
727 return false;
728
729 SDValue N0 = Node->getOperand(0);
730 if (!N0.hasOneUse())
731 return false;
732
733 auto BitfieldInsert = [&](SDValue N0, unsigned Msb, unsigned Lsb,
734 const SDLoc &DL, MVT VT) {
735 unsigned Opc = RISCV::NDS_BFOS;
736 // If the Lsb is equal to the Msb, then the Lsb should be 0.
737 if (Lsb == Msb)
738 Lsb = 0;
739 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
740 CurDAG->getTargetConstant(Lsb, DL, VT),
741 CurDAG->getTargetConstant(Msb, DL, VT));
742 };
743
744 SDLoc DL(Node);
745 MVT VT = Node->getSimpleValueType(0);
746 const unsigned RightShAmt = N1C->getZExtValue();
747
748 // Transform (sra (shl X, C1) C2) with C1 > C2
749 // -> (NDS.BFOS X, lsb, msb)
750 if (N0.getOpcode() == ISD::SHL) {
751 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
752 if (!N01C)
753 return false;
754
755 const unsigned LeftShAmt = N01C->getZExtValue();
756 // Make sure that this is a bitfield insertion (i.e., the shift-right
757 // amount should be less than the left-shift).
758 if (LeftShAmt <= RightShAmt)
759 return false;
760
761 const unsigned MsbPlusOne = VT.getSizeInBits() - RightShAmt;
762 const unsigned Msb = MsbPlusOne - 1;
763 const unsigned Lsb = LeftShAmt - RightShAmt;
764
765 SDNode *Sbi = BitfieldInsert(N0, Msb, Lsb, DL, VT);
766 ReplaceNode(Node, Sbi);
767 return true;
768 }
769
770 return false;
771}
772
774 const SDLoc &DL, MVT VT,
775 SDValue X, unsigned Msb,
776 unsigned Lsb) {
777 unsigned Opc;
778
779 if (Subtarget->hasVendorXTHeadBb()) {
780 Opc = RISCV::TH_EXTU;
781 } else if (Subtarget->hasVendorXAndesPerf()) {
782 Opc = RISCV::NDS_BFOZ;
783 } else if (Subtarget->hasVendorXqcibm()) {
784 Opc = RISCV::QC_EXTU;
785 // QC.EXTU X, width, shamt
786 // shamt is the same as Lsb
787 // width is the number of bits to extract from the Lsb
788 Msb = Msb - Lsb + 1;
789 } else {
790 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
791 return false;
792 }
793
794 SDNode *Ube = CurDAG->getMachineNode(Opc, DL, VT, X,
795 CurDAG->getTargetConstant(Msb, DL, VT),
796 CurDAG->getTargetConstant(Lsb, DL, VT));
797 ReplaceNode(Node, Ube);
798 return true;
799}
800
802 const SDLoc &DL, MVT VT,
803 SDValue X, unsigned Msb,
804 unsigned Lsb) {
805 // Only supported with XAndesPerf at the moment.
806 if (!Subtarget->hasVendorXAndesPerf())
807 return false;
808
809 unsigned Opc = RISCV::NDS_BFOZ;
810
811 // If the Lsb is equal to the Msb, then the Lsb should be 0.
812 if (Lsb == Msb)
813 Lsb = 0;
814 SDNode *Ubi = CurDAG->getMachineNode(Opc, DL, VT, X,
815 CurDAG->getTargetConstant(Lsb, DL, VT),
816 CurDAG->getTargetConstant(Msb, DL, VT));
817 ReplaceNode(Node, Ubi);
818 return true;
819}
820
822 // Target does not support indexed loads.
823 if (!Subtarget->hasVendorXTHeadMemIdx())
824 return false;
825
828 if (AM == ISD::UNINDEXED)
829 return false;
830
832 if (!C)
833 return false;
834
835 EVT LoadVT = Ld->getMemoryVT();
836 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
837 "Unexpected addressing mode");
838 bool IsPre = AM == ISD::PRE_INC;
839 bool IsPost = AM == ISD::POST_INC;
840 int64_t Offset = C->getSExtValue();
841
842 // The constants that can be encoded in the THeadMemIdx instructions
843 // are of the form (sign_extend(imm5) << imm2).
844 unsigned Shift;
845 for (Shift = 0; Shift < 4; Shift++)
846 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
847 break;
848
849 // Constant cannot be encoded.
850 if (Shift == 4)
851 return false;
852
853 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
854 unsigned Opcode;
855 if (LoadVT == MVT::i8 && IsPre)
856 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
857 else if (LoadVT == MVT::i8 && IsPost)
858 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
859 else if (LoadVT == MVT::i16 && IsPre)
860 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
861 else if (LoadVT == MVT::i16 && IsPost)
862 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
863 else if (LoadVT == MVT::i32 && IsPre)
864 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
865 else if (LoadVT == MVT::i32 && IsPost)
866 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
867 else if (LoadVT == MVT::i64 && IsPre)
868 Opcode = RISCV::TH_LDIB;
869 else if (LoadVT == MVT::i64 && IsPost)
870 Opcode = RISCV::TH_LDIA;
871 else
872 return false;
873
874 EVT Ty = Ld->getOffset().getValueType();
875 SDValue Ops[] = {
876 Ld->getBasePtr(),
877 CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
878 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), Ld->getChain()};
879 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
880 Ld->getValueType(1), MVT::Other, Ops);
881
882 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
883 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
884
885 ReplaceNode(Node, New);
886
887 return true;
888}
889
890static Register getTileReg(uint64_t TileNum) {
891 assert(TileNum <= 15 && "Invalid tile number");
892 return RISCV::T0 + TileNum;
893}
894
896 if (!Subtarget->hasVInstructions())
897 return;
898
899 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
900
901 SDLoc DL(Node);
902 unsigned IntNo = Node->getConstantOperandVal(1);
903
904 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
905 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
906 "Unexpected vsetvli intrinsic");
907
908 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
909 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
910 SDValue SEWOp =
911 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
912 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
913 Node->getOperand(4), Node->getOperand(5),
914 Node->getOperand(8), SEWOp,
915 Node->getOperand(0)};
916
917 unsigned Opcode;
918 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
919 switch (LMulSDNode->getSExtValue()) {
920 case 5:
921 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF8
922 : RISCV::PseudoSF_VC_I_SE_MF8;
923 break;
924 case 6:
925 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF4
926 : RISCV::PseudoSF_VC_I_SE_MF4;
927 break;
928 case 7:
929 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF2
930 : RISCV::PseudoSF_VC_I_SE_MF2;
931 break;
932 case 0:
933 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M1
934 : RISCV::PseudoSF_VC_I_SE_M1;
935 break;
936 case 1:
937 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M2
938 : RISCV::PseudoSF_VC_I_SE_M2;
939 break;
940 case 2:
941 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M4
942 : RISCV::PseudoSF_VC_I_SE_M4;
943 break;
944 case 3:
945 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M8
946 : RISCV::PseudoSF_VC_I_SE_M8;
947 break;
948 }
949
950 ReplaceNode(Node, CurDAG->getMachineNode(
951 Opcode, DL, Node->getSimpleValueType(0), Operands));
952}
953
954static unsigned getSegInstNF(unsigned Intrinsic) {
955#define INST_NF_CASE(NAME, NF) \
956 case Intrinsic::riscv_##NAME##NF: \
957 return NF;
958#define INST_NF_CASE_MASK(NAME, NF) \
959 case Intrinsic::riscv_##NAME##NF##_mask: \
960 return NF;
961#define INST_NF_CASE_FF(NAME, NF) \
962 case Intrinsic::riscv_##NAME##NF##ff: \
963 return NF;
964#define INST_NF_CASE_FF_MASK(NAME, NF) \
965 case Intrinsic::riscv_##NAME##NF##ff_mask: \
966 return NF;
967#define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
968 MACRO_NAME(NAME, 2) \
969 MACRO_NAME(NAME, 3) \
970 MACRO_NAME(NAME, 4) \
971 MACRO_NAME(NAME, 5) \
972 MACRO_NAME(NAME, 6) \
973 MACRO_NAME(NAME, 7) \
974 MACRO_NAME(NAME, 8)
975#define INST_ALL_NF_CASE(NAME) \
976 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
977 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
978#define INST_ALL_NF_CASE_WITH_FF(NAME) \
979 INST_ALL_NF_CASE(NAME) \
980 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
981 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
982 switch (Intrinsic) {
983 default:
984 llvm_unreachable("Unexpected segment load/store intrinsic");
986 INST_ALL_NF_CASE(vlsseg)
987 INST_ALL_NF_CASE(vloxseg)
988 INST_ALL_NF_CASE(vluxseg)
989 INST_ALL_NF_CASE(vsseg)
990 INST_ALL_NF_CASE(vssseg)
991 INST_ALL_NF_CASE(vsoxseg)
992 INST_ALL_NF_CASE(vsuxseg)
993 }
994}
995
996static bool isApplicableToPLI(int Val) {
997 // Check if the immediate is packed i8 or i10
998 int16_t Bit31To16 = Val >> 16;
999 int16_t Bit15To0 = Val;
1000 int8_t Bit15To8 = Bit15To0 >> 8;
1001 int8_t Bit7To0 = Val;
1002 if (Bit31To16 != Bit15To0)
1003 return false;
1004
1005 return isInt<10>(Bit31To16) || Bit15To8 == Bit7To0;
1006}
1007
1009 // If we have a custom node, we have already selected.
1010 if (Node->isMachineOpcode()) {
1011 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
1012 Node->setNodeId(-1);
1013 return;
1014 }
1015
1016 // Instruction Selection not handled by the auto-generated tablegen selection
1017 // should be handled here.
1018 unsigned Opcode = Node->getOpcode();
1019 MVT XLenVT = Subtarget->getXLenVT();
1020 SDLoc DL(Node);
1021 MVT VT = Node->getSimpleValueType(0);
1022
1023 bool HasBitTest = Subtarget->hasBEXTILike();
1024
1025 switch (Opcode) {
1026 case ISD::Constant: {
1027 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
1028 auto *ConstNode = cast<ConstantSDNode>(Node);
1029 if (ConstNode->isZero()) {
1030 SDValue New =
1031 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
1032 ReplaceNode(Node, New.getNode());
1033 return;
1034 }
1035 int64_t Imm = ConstNode->getSExtValue();
1036 // If only the lower 8 bits are used, try to convert this to a simm6 by
1037 // sign-extending bit 7. This is neutral without the C extension, and
1038 // allows C.LI to be used if C is present.
1039 if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
1040 Imm = SignExtend64<8>(Imm);
1041 // If the upper XLen-16 bits are not used, try to convert this to a simm12
1042 // by sign extending bit 15.
1043 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
1045 Imm = SignExtend64<16>(Imm);
1046 // If the upper 32-bits are not used try to convert this into a simm32 by
1047 // sign extending bit 32.
1048 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
1049 Imm = SignExtend64<32>(Imm);
1050
1051 if (Subtarget->enablePExtCodeGen() && isApplicableToPLI(Imm) &&
1052 hasAllWUsers(Node)) {
1053 // If it's 4 packed 8-bit integers or 2 packed signed 16-bit integers, we
1054 // can simply copy lower 32 bits to higher 32 bits to make it able to
1055 // rematerialize to PLI_B or PLI_H
1056 Imm = ((uint64_t)Imm << 32) | (Imm & 0xFFFFFFFF);
1057 }
1058
1059 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
1060 return;
1061 }
1062 case ISD::ConstantFP: {
1063 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
1064
1065 bool Is64Bit = Subtarget->is64Bit();
1066 bool HasZdinx = Subtarget->hasStdExtZdinx();
1067
1068 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
1069 SDValue Imm;
1070 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
1071 // create an integer immediate.
1072 if (APF.isPosZero() || NegZeroF64) {
1073 if (VT == MVT::f64 && HasZdinx && !Is64Bit)
1074 Imm = CurDAG->getRegister(RISCV::X0_Pair, MVT::f64);
1075 else
1076 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
1077 } else {
1078 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
1079 *Subtarget);
1080 }
1081
1082 unsigned Opc;
1083 switch (VT.SimpleTy) {
1084 default:
1085 llvm_unreachable("Unexpected size");
1086 case MVT::bf16:
1087 assert(Subtarget->hasStdExtZfbfmin());
1088 Opc = RISCV::FMV_H_X;
1089 break;
1090 case MVT::f16:
1091 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
1092 break;
1093 case MVT::f32:
1094 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
1095 break;
1096 case MVT::f64:
1097 // For RV32, we can't move from a GPR, we need to convert instead. This
1098 // should only happen for +0.0 and -0.0.
1099 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
1100 if (HasZdinx)
1101 Opc = RISCV::COPY;
1102 else
1103 Opc = Is64Bit ? RISCV::FMV_D_X : RISCV::FCVT_D_W;
1104 break;
1105 }
1106
1107 SDNode *Res;
1108 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
1109 Res =
1110 CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
1111 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
1112 Res =
1113 CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
1114 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
1115 Res = CurDAG->getMachineNode(
1116 Opc, DL, VT, Imm,
1117 CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
1118 else
1119 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1120
1121 // For f64 -0.0, we need to insert a fneg.d idiom.
1122 if (NegZeroF64) {
1123 Opc = RISCV::FSGNJN_D;
1124 if (HasZdinx)
1125 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1126 Res =
1127 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1128 }
1129
1130 ReplaceNode(Node, Res);
1131 return;
1132 }
1133 case RISCVISD::BuildGPRPair:
1134 case RISCVISD::BuildPairF64: {
1135 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
1136 break;
1137
1138 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) &&
1139 "BuildPairF64 only handled here on rv32i_zdinx");
1140
1141 SDValue Ops[] = {
1142 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1143 Node->getOperand(0),
1144 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1145 Node->getOperand(1),
1146 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1147
1148 SDNode *N = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
1149 ReplaceNode(Node, N);
1150 return;
1151 }
1152 case RISCVISD::SplitGPRPair:
1153 case RISCVISD::SplitF64: {
1154 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
1155 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) &&
1156 "SplitF64 only handled here on rv32i_zdinx");
1157
1158 if (!SDValue(Node, 0).use_empty()) {
1159 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1160 Node->getValueType(0),
1161 Node->getOperand(0));
1162 ReplaceUses(SDValue(Node, 0), Lo);
1163 }
1164
1165 if (!SDValue(Node, 1).use_empty()) {
1166 SDValue Hi = CurDAG->getTargetExtractSubreg(
1167 RISCV::sub_gpr_odd, DL, Node->getValueType(1), Node->getOperand(0));
1168 ReplaceUses(SDValue(Node, 1), Hi);
1169 }
1170
1171 CurDAG->RemoveDeadNode(Node);
1172 return;
1173 }
1174
1175 assert(Opcode != RISCVISD::SplitGPRPair &&
1176 "SplitGPRPair should already be handled");
1177
1178 if (!Subtarget->hasStdExtZfa())
1179 break;
1180 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1181 "Unexpected subtarget");
1182
1183 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1184 if (!SDValue(Node, 0).use_empty()) {
1185 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1186 Node->getOperand(0));
1187 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1188 }
1189 if (!SDValue(Node, 1).use_empty()) {
1190 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1191 Node->getOperand(0));
1192 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1193 }
1194
1195 CurDAG->RemoveDeadNode(Node);
1196 return;
1197 }
1198 case ISD::SHL: {
1199 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1200 if (!N1C)
1201 break;
1202 SDValue N0 = Node->getOperand(0);
1203 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1205 break;
1206 unsigned ShAmt = N1C->getZExtValue();
1207 uint64_t Mask = N0.getConstantOperandVal(1);
1208
1209 if (isShiftedMask_64(Mask)) {
1210 unsigned XLen = Subtarget->getXLen();
1211 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1212 unsigned TrailingZeros = llvm::countr_zero(Mask);
1213 if (ShAmt <= 32 && TrailingZeros > 0 && LeadingZeros == 32) {
1214 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1215 // where C2 has 32 leading zeros and C3 trailing zeros.
1216 SDNode *SRLIW = CurDAG->getMachineNode(
1217 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1218 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1219 SDNode *SLLI = CurDAG->getMachineNode(
1220 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1221 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1222 ReplaceNode(Node, SLLI);
1223 return;
1224 }
1225 if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1226 XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1227 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1228 // where C2 has C4 leading zeros and no trailing zeros.
1229 // This is profitable if the "and" was to be lowered to
1230 // (srli (slli X, C4), C4) and not (andi X, C2).
1231 // For "LeadingZeros == 32":
1232 // - with Zba it's just (slli.uw X, C)
1233 // - without Zba a tablegen pattern applies the very same
1234 // transform as we would have done here
1235 SDNode *SLLI = CurDAG->getMachineNode(
1236 RISCV::SLLI, DL, VT, N0.getOperand(0),
1237 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1238 SDNode *SRLI = CurDAG->getMachineNode(
1239 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1240 CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
1241 ReplaceNode(Node, SRLI);
1242 return;
1243 }
1244 }
1245 break;
1246 }
1247 case ISD::SRL: {
1248 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1249 if (!N1C)
1250 break;
1251 SDValue N0 = Node->getOperand(0);
1252 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1253 break;
1254 unsigned ShAmt = N1C->getZExtValue();
1255 uint64_t Mask = N0.getConstantOperandVal(1);
1256
1257 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1258 // 32 leading zeros and C3 trailing zeros.
1259 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1260 unsigned XLen = Subtarget->getXLen();
1261 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1262 unsigned TrailingZeros = llvm::countr_zero(Mask);
1263 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1264 SDNode *SRLIW = CurDAG->getMachineNode(
1265 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1266 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1267 SDNode *SLLI = CurDAG->getMachineNode(
1268 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1269 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1270 ReplaceNode(Node, SLLI);
1271 return;
1272 }
1273 }
1274
1275 // Optimize (srl (and X, C2), C) ->
1276 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1277 // Where C2 is a mask with C3 trailing ones.
1278 // Taking into account that the C2 may have had lower bits unset by
1279 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1280 // This pattern occurs when type legalizing right shifts for types with
1281 // less than XLen bits.
1282 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1283 if (!isMask_64(Mask))
1284 break;
1285 unsigned TrailingOnes = llvm::countr_one(Mask);
1286 if (ShAmt >= TrailingOnes)
1287 break;
1288 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1289 if (TrailingOnes == 32) {
1290 SDNode *SRLI = CurDAG->getMachineNode(
1291 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1292 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1293 ReplaceNode(Node, SRLI);
1294 return;
1295 }
1296
1297 // Only do the remaining transforms if the AND has one use.
1298 if (!N0.hasOneUse())
1299 break;
1300
1301 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1302 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1303 SDNode *BEXTI = CurDAG->getMachineNode(
1304 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1305 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1306 ReplaceNode(Node, BEXTI);
1307 return;
1308 }
1309
1310 const unsigned Msb = TrailingOnes - 1;
1311 const unsigned Lsb = ShAmt;
1312 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0.getOperand(0), Msb, Lsb))
1313 return;
1314
1315 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1316 SDNode *SLLI =
1317 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1318 CurDAG->getTargetConstant(LShAmt, DL, VT));
1319 SDNode *SRLI = CurDAG->getMachineNode(
1320 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1321 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1322 ReplaceNode(Node, SRLI);
1323 return;
1324 }
1325 case ISD::SRA: {
1327 return;
1328
1330 return;
1331
1332 // Optimize (sra (sext_inreg X, i16), C) ->
1333 // (srai (slli X, (XLen-16), (XLen-16) + C)
1334 // And (sra (sext_inreg X, i8), C) ->
1335 // (srai (slli X, (XLen-8), (XLen-8) + C)
1336 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1337 // This transform matches the code we get without Zbb. The shifts are more
1338 // compressible, and this can help expose CSE opportunities in the sdiv by
1339 // constant optimization.
1340 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1341 if (!N1C)
1342 break;
1343 SDValue N0 = Node->getOperand(0);
1344 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1345 break;
1346 unsigned ShAmt = N1C->getZExtValue();
1347 unsigned ExtSize =
1348 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1349 // ExtSize of 32 should use sraiw via tablegen pattern.
1350 if (ExtSize >= 32 || ShAmt >= ExtSize)
1351 break;
1352 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1353 SDNode *SLLI =
1354 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1355 CurDAG->getTargetConstant(LShAmt, DL, VT));
1356 SDNode *SRAI = CurDAG->getMachineNode(
1357 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1358 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1359 ReplaceNode(Node, SRAI);
1360 return;
1361 }
1362 case ISD::OR: {
1364 return;
1365
1366 break;
1367 }
1368 case ISD::XOR:
1370 return;
1371
1372 break;
1373 case ISD::AND: {
1374 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1375 if (!N1C)
1376 break;
1377
1378 SDValue N0 = Node->getOperand(0);
1379
1380 bool LeftShift = N0.getOpcode() == ISD::SHL;
1381 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1382 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1383 if (!C)
1384 break;
1385 unsigned C2 = C->getZExtValue();
1386 unsigned XLen = Subtarget->getXLen();
1387 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1388
1389 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1390 // shift pair might offer more compression opportunities.
1391 // TODO: We could check for C extension here, but we don't have many lit
1392 // tests with the C extension enabled so not checking gets better
1393 // coverage.
1394 // TODO: What if ANDI faster than shift?
1395 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1396
1397 uint64_t C1 = N1C->getZExtValue();
1398
1399 // Clear irrelevant bits in the mask.
1400 if (LeftShift)
1402 else
1403 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1404
1405 // Some transforms should only be done if the shift has a single use or
1406 // the AND would become (srli (slli X, 32), 32)
1407 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1408
1409 SDValue X = N0.getOperand(0);
1410
1411 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1412 // with c3 leading zeros.
1413 if (!LeftShift && isMask_64(C1)) {
1414 unsigned Leading = XLen - llvm::bit_width(C1);
1415 if (C2 < Leading) {
1416 // If the number of leading zeros is C2+32 this can be SRLIW.
1417 if (C2 + 32 == Leading) {
1418 SDNode *SRLIW = CurDAG->getMachineNode(
1419 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1420 ReplaceNode(Node, SRLIW);
1421 return;
1422 }
1423
1424 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1425 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1426 //
1427 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1428 // legalized and goes through DAG combine.
1429 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1430 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1431 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1432 SDNode *SRAIW =
1433 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1434 CurDAG->getTargetConstant(31, DL, VT));
1435 SDNode *SRLIW = CurDAG->getMachineNode(
1436 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1437 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1438 ReplaceNode(Node, SRLIW);
1439 return;
1440 }
1441
1442 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1443 // available.
1444 // Transform (and (srl x, C2), C1)
1445 // -> (<bfextract> x, msb, lsb)
1446 //
1447 // Make sure to keep this below the SRLIW cases, as we always want to
1448 // prefer the more common instruction.
1449 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1450 const unsigned Lsb = C2;
1451 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1452 return;
1453
1454 // (srli (slli x, c3-c2), c3).
1455 // Skip if we could use (zext.w (sraiw X, C2)).
1456 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1457 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1458 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1459 // Also Skip if we can use bexti or th.tst.
1460 Skip |= HasBitTest && Leading == XLen - 1;
1461 if (OneUseOrZExtW && !Skip) {
1462 SDNode *SLLI = CurDAG->getMachineNode(
1463 RISCV::SLLI, DL, VT, X,
1464 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1465 SDNode *SRLI = CurDAG->getMachineNode(
1466 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1467 CurDAG->getTargetConstant(Leading, DL, VT));
1468 ReplaceNode(Node, SRLI);
1469 return;
1470 }
1471 }
1472 }
1473
1474 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1475 // shifted by c2 bits with c3 leading zeros.
1476 if (LeftShift && isShiftedMask_64(C1)) {
1477 unsigned Leading = XLen - llvm::bit_width(C1);
1478
1479 if (C2 + Leading < XLen &&
1480 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1481 // Use slli.uw when possible.
1482 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1483 SDNode *SLLI_UW =
1484 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1485 CurDAG->getTargetConstant(C2, DL, VT));
1486 ReplaceNode(Node, SLLI_UW);
1487 return;
1488 }
1489
1490 // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
1491 // available.
1492 // Transform (and (shl x, c2), c1)
1493 // -> (<bfinsert> x, msb, lsb)
1494 // e.g.
1495 // (and (shl x, 12), 0x00fff000)
1496 // If XLen = 32 and C2 = 12, then
1497 // Msb = 32 - 8 - 1 = 23 and Lsb = 12
1498 const unsigned Msb = XLen - Leading - 1;
1499 const unsigned Lsb = C2;
1500 if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
1501 return;
1502
1503 if (OneUseOrZExtW && !IsCANDI) {
1504 // (packh x0, X)
1505 if (Subtarget->hasStdExtZbkb() && C1 == 0xff00 && C2 == 8) {
1506 SDNode *PACKH = CurDAG->getMachineNode(
1507 RISCV::PACKH, DL, VT,
1508 CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()), X);
1509 ReplaceNode(Node, PACKH);
1510 return;
1511 }
1512 // (srli (slli c2+c3), c3)
1513 SDNode *SLLI = CurDAG->getMachineNode(
1514 RISCV::SLLI, DL, VT, X,
1515 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1516 SDNode *SRLI = CurDAG->getMachineNode(
1517 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1518 CurDAG->getTargetConstant(Leading, DL, VT));
1519 ReplaceNode(Node, SRLI);
1520 return;
1521 }
1522 }
1523 }
1524
1525 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1526 // shifted mask with c2 leading zeros and c3 trailing zeros.
1527 if (!LeftShift && isShiftedMask_64(C1)) {
1528 unsigned Leading = XLen - llvm::bit_width(C1);
1529 unsigned Trailing = llvm::countr_zero(C1);
1530 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1531 !IsCANDI) {
1532 unsigned SrliOpc = RISCV::SRLI;
1533 // If the input is zexti32 we should use SRLIW.
1534 if (X.getOpcode() == ISD::AND &&
1535 isa<ConstantSDNode>(X.getOperand(1)) &&
1536 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1537 SrliOpc = RISCV::SRLIW;
1538 X = X.getOperand(0);
1539 }
1540 SDNode *SRLI = CurDAG->getMachineNode(
1541 SrliOpc, DL, VT, X,
1542 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1543 SDNode *SLLI = CurDAG->getMachineNode(
1544 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1545 CurDAG->getTargetConstant(Trailing, DL, VT));
1546 ReplaceNode(Node, SLLI);
1547 return;
1548 }
1549 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1550 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1551 OneUseOrZExtW && !IsCANDI) {
1552 SDNode *SRLIW = CurDAG->getMachineNode(
1553 RISCV::SRLIW, DL, VT, X,
1554 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1555 SDNode *SLLI = CurDAG->getMachineNode(
1556 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1557 CurDAG->getTargetConstant(Trailing, DL, VT));
1558 ReplaceNode(Node, SLLI);
1559 return;
1560 }
1561 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1562 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1563 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1564 SDNode *SRLI = CurDAG->getMachineNode(
1565 RISCV::SRLI, DL, VT, X,
1566 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1567 SDNode *SLLI_UW = CurDAG->getMachineNode(
1568 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1569 CurDAG->getTargetConstant(Trailing, DL, VT));
1570 ReplaceNode(Node, SLLI_UW);
1571 return;
1572 }
1573 }
1574
1575 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1576 // shifted mask with no leading zeros and c3 trailing zeros.
1577 if (LeftShift && isShiftedMask_64(C1)) {
1578 unsigned Leading = XLen - llvm::bit_width(C1);
1579 unsigned Trailing = llvm::countr_zero(C1);
1580 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1581 SDNode *SRLI = CurDAG->getMachineNode(
1582 RISCV::SRLI, DL, VT, X,
1583 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1584 SDNode *SLLI = CurDAG->getMachineNode(
1585 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1586 CurDAG->getTargetConstant(Trailing, DL, VT));
1587 ReplaceNode(Node, SLLI);
1588 return;
1589 }
1590 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1591 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1592 SDNode *SRLIW = CurDAG->getMachineNode(
1593 RISCV::SRLIW, DL, VT, X,
1594 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1595 SDNode *SLLI = CurDAG->getMachineNode(
1596 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1597 CurDAG->getTargetConstant(Trailing, DL, VT));
1598 ReplaceNode(Node, SLLI);
1599 return;
1600 }
1601
1602 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1603 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1604 Subtarget->hasStdExtZba()) {
1605 SDNode *SRLI = CurDAG->getMachineNode(
1606 RISCV::SRLI, DL, VT, X,
1607 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1608 SDNode *SLLI_UW = CurDAG->getMachineNode(
1609 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1610 CurDAG->getTargetConstant(Trailing, DL, VT));
1611 ReplaceNode(Node, SLLI_UW);
1612 return;
1613 }
1614 }
1615 }
1616
1617 const uint64_t C1 = N1C->getZExtValue();
1618
1619 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1620 N0.hasOneUse()) {
1621 unsigned C2 = N0.getConstantOperandVal(1);
1622 unsigned XLen = Subtarget->getXLen();
1623 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1624
1625 SDValue X = N0.getOperand(0);
1626
1627 // Prefer SRAIW + ANDI when possible.
1628 bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1629 X.getOpcode() == ISD::SHL &&
1630 isa<ConstantSDNode>(X.getOperand(1)) &&
1631 X.getConstantOperandVal(1) == 32;
1632 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1633 // mask with c3 leading zeros and c2 is larger than c3.
1634 if (isMask_64(C1) && !Skip) {
1635 unsigned Leading = XLen - llvm::bit_width(C1);
1636 if (C2 > Leading) {
1637 SDNode *SRAI = CurDAG->getMachineNode(
1638 RISCV::SRAI, DL, VT, X,
1639 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1640 SDNode *SRLI = CurDAG->getMachineNode(
1641 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1642 CurDAG->getTargetConstant(Leading, DL, VT));
1643 ReplaceNode(Node, SRLI);
1644 return;
1645 }
1646 }
1647
1648 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1649 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1650 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1651 if (isShiftedMask_64(C1) && !Skip) {
1652 unsigned Leading = XLen - llvm::bit_width(C1);
1653 unsigned Trailing = llvm::countr_zero(C1);
1654 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1655 SDNode *SRAI = CurDAG->getMachineNode(
1656 RISCV::SRAI, DL, VT, N0.getOperand(0),
1657 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1658 SDNode *SRLI = CurDAG->getMachineNode(
1659 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1660 CurDAG->getTargetConstant(Leading + Trailing, DL, VT));
1661 SDNode *SLLI = CurDAG->getMachineNode(
1662 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1663 CurDAG->getTargetConstant(Trailing, DL, VT));
1664 ReplaceNode(Node, SLLI);
1665 return;
1666 }
1667 }
1668 }
1669
1670 // If C1 masks off the upper bits only (but can't be formed as an
1671 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1672 // available.
1673 // Transform (and x, C1)
1674 // -> (<bfextract> x, msb, lsb)
1675 if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue()) &&
1676 !(C1 == 0xffff && Subtarget->hasStdExtZbb()) &&
1677 !(C1 == 0xffffffff && Subtarget->hasStdExtZba())) {
1678 const unsigned Msb = llvm::bit_width(C1) - 1;
1679 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1680 return;
1681 }
1682
1684 return;
1685
1686 break;
1687 }
1688 case ISD::MUL: {
1689 // Special case for calculating (mul (and X, C2), C1) where the full product
1690 // fits in XLen bits. We can shift X left by the number of leading zeros in
1691 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1692 // product has XLen trailing zeros, putting it in the output of MULHU. This
1693 // can avoid materializing a constant in a register for C2.
1694
1695 // RHS should be a constant.
1696 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1697 if (!N1C || !N1C->hasOneUse())
1698 break;
1699
1700 // LHS should be an AND with constant.
1701 SDValue N0 = Node->getOperand(0);
1702 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1703 break;
1704
1706
1707 // Constant should be a mask.
1708 if (!isMask_64(C2))
1709 break;
1710
1711 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1712 // multiple users or the constant is a simm12. This prevents inserting a
1713 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1714 // make it more costly to materialize. Otherwise, using a SLLI might allow
1715 // it to be compressed.
1716 bool IsANDIOrZExt =
1717 isInt<12>(C2) ||
1718 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1719 // With XTHeadBb, we can use TH.EXTU.
1720 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1721 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1722 break;
1723 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1724 // the constant is a simm32.
1725 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1726 // With XTHeadBb, we can use TH.EXTU.
1727 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1728 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1729 break;
1730
1731 // We need to shift left the AND input and C1 by a total of XLen bits.
1732
1733 // How far left do we need to shift the AND input?
1734 unsigned XLen = Subtarget->getXLen();
1735 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1736
1737 // The constant gets shifted by the remaining amount unless that would
1738 // shift bits out.
1739 uint64_t C1 = N1C->getZExtValue();
1740 unsigned ConstantShift = XLen - LeadingZeros;
1741 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1742 break;
1743
1744 uint64_t ShiftedC1 = C1 << ConstantShift;
1745 // If this RV32, we need to sign extend the constant.
1746 if (XLen == 32)
1747 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1748
1749 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1750 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1751 SDNode *SLLI =
1752 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1753 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1754 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1755 SDValue(SLLI, 0), SDValue(Imm, 0));
1756 ReplaceNode(Node, MULHU);
1757 return;
1758 }
1759 case ISD::LOAD: {
1760 if (tryIndexedLoad(Node))
1761 return;
1762
1763 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1764 // We match post-incrementing load here
1766 if (Load->getAddressingMode() != ISD::POST_INC)
1767 break;
1768
1769 SDValue Chain = Node->getOperand(0);
1770 SDValue Base = Node->getOperand(1);
1771 SDValue Offset = Node->getOperand(2);
1772
1773 bool Simm12 = false;
1774 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1775
1776 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1777 int ConstantVal = ConstantOffset->getSExtValue();
1778 Simm12 = isInt<12>(ConstantVal);
1779 if (Simm12)
1780 Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1781 Offset.getValueType());
1782 }
1783
1784 unsigned Opcode = 0;
1785 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1786 case MVT::i8:
1787 if (Simm12 && SignExtend)
1788 Opcode = RISCV::CV_LB_ri_inc;
1789 else if (Simm12 && !SignExtend)
1790 Opcode = RISCV::CV_LBU_ri_inc;
1791 else if (!Simm12 && SignExtend)
1792 Opcode = RISCV::CV_LB_rr_inc;
1793 else
1794 Opcode = RISCV::CV_LBU_rr_inc;
1795 break;
1796 case MVT::i16:
1797 if (Simm12 && SignExtend)
1798 Opcode = RISCV::CV_LH_ri_inc;
1799 else if (Simm12 && !SignExtend)
1800 Opcode = RISCV::CV_LHU_ri_inc;
1801 else if (!Simm12 && SignExtend)
1802 Opcode = RISCV::CV_LH_rr_inc;
1803 else
1804 Opcode = RISCV::CV_LHU_rr_inc;
1805 break;
1806 case MVT::i32:
1807 if (Simm12)
1808 Opcode = RISCV::CV_LW_ri_inc;
1809 else
1810 Opcode = RISCV::CV_LW_rr_inc;
1811 break;
1812 default:
1813 break;
1814 }
1815 if (!Opcode)
1816 break;
1817
1818 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1819 Chain.getSimpleValueType(), Base,
1820 Offset, Chain));
1821 return;
1822 }
1823 break;
1824 }
1825 case RISCVISD::LD_RV32: {
1826 assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
1827
1829 SDValue Chain = Node->getOperand(0);
1830 SDValue Addr = Node->getOperand(1);
1832
1833 SDValue Ops[] = {Base, Offset, Chain};
1834 MachineSDNode *New = CurDAG->getMachineNode(
1835 RISCV::LD_RV32, DL, {MVT::Untyped, MVT::Other}, Ops);
1836 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1837 MVT::i32, SDValue(New, 0));
1838 SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL,
1839 MVT::i32, SDValue(New, 0));
1840 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1841 ReplaceUses(SDValue(Node, 0), Lo);
1842 ReplaceUses(SDValue(Node, 1), Hi);
1843 ReplaceUses(SDValue(Node, 2), SDValue(New, 1));
1844 CurDAG->RemoveDeadNode(Node);
1845 return;
1846 }
1847 case RISCVISD::SD_RV32: {
1849 SDValue Chain = Node->getOperand(0);
1850 SDValue Addr = Node->getOperand(3);
1852
1853 SDValue Lo = Node->getOperand(1);
1854 SDValue Hi = Node->getOperand(2);
1855
1856 SDValue RegPair;
1857 // Peephole to use X0_Pair for storing zero.
1859 RegPair = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
1860 } else {
1861 SDValue Ops[] = {
1862 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Lo,
1863 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Hi,
1864 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1865
1866 RegPair = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
1867 MVT::Untyped, Ops),
1868 0);
1869 }
1870
1871 MachineSDNode *New = CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other,
1872 {RegPair, Base, Offset, Chain});
1873 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1874 ReplaceUses(SDValue(Node, 0), SDValue(New, 0));
1875 CurDAG->RemoveDeadNode(Node);
1876 return;
1877 }
1878 case RISCVISD::PPACK_DH: {
1879 assert(Subtarget->enablePExtCodeGen() && Subtarget->isRV32());
1880
1881 SDValue Val0 = Node->getOperand(0);
1882 SDValue Val1 = Node->getOperand(1);
1883 SDValue Val2 = Node->getOperand(2);
1884 SDValue Val3 = Node->getOperand(3);
1885
1886 SDValue Ops[] = {
1887 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Val0,
1888 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Val2,
1889 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1890 SDValue RegPair0 =
1891 SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
1892 MVT::Untyped, Ops),
1893 0);
1894 SDValue Ops1[] = {
1895 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Val1,
1896 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Val3,
1897 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1898 SDValue RegPair1 =
1899 SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
1900 MVT::Untyped, Ops1),
1901 0);
1902
1903 MachineSDNode *PackDH = CurDAG->getMachineNode(
1904 RISCV::PPAIRE_DB, DL, MVT::Untyped, {RegPair0, RegPair1});
1905
1906 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1907 MVT::i32, SDValue(PackDH, 0));
1908 SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL,
1909 MVT::i32, SDValue(PackDH, 0));
1910 ReplaceUses(SDValue(Node, 0), Lo);
1911 ReplaceUses(SDValue(Node, 1), Hi);
1912 CurDAG->RemoveDeadNode(Node);
1913 return;
1914 }
1916 unsigned IntNo = Node->getConstantOperandVal(0);
1917 switch (IntNo) {
1918 // By default we do not custom select any intrinsic.
1919 default:
1920 break;
1921 case Intrinsic::riscv_vmsgeu:
1922 case Intrinsic::riscv_vmsge: {
1923 SDValue Src1 = Node->getOperand(1);
1924 SDValue Src2 = Node->getOperand(2);
1925 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1926 bool IsCmpConstant = false;
1927 bool IsCmpMinimum = false;
1928 // Only custom select scalar second operand.
1929 if (Src2.getValueType() != XLenVT)
1930 break;
1931 // Small constants are handled with patterns.
1932 int64_t CVal = 0;
1933 MVT Src1VT = Src1.getSimpleValueType();
1934 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1935 IsCmpConstant = true;
1936 CVal = C->getSExtValue();
1937 if (CVal >= -15 && CVal <= 16) {
1938 if (!IsUnsigned || CVal != 0)
1939 break;
1940 IsCmpMinimum = true;
1941 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1942 Src1VT.getScalarSizeInBits())
1943 .getSExtValue()) {
1944 IsCmpMinimum = true;
1945 }
1946 }
1947 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
1948 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1949 default:
1950 llvm_unreachable("Unexpected LMUL!");
1951#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
1952 case RISCVVType::lmulenum: \
1953 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1954 : RISCV::PseudoVMSLT_VX_##suffix; \
1955 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
1956 : RISCV::PseudoVMSGT_VX_##suffix; \
1957 break;
1958 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1959 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1960 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1961 CASE_VMSLT_OPCODES(LMUL_1, M1)
1962 CASE_VMSLT_OPCODES(LMUL_2, M2)
1963 CASE_VMSLT_OPCODES(LMUL_4, M4)
1964 CASE_VMSLT_OPCODES(LMUL_8, M8)
1965#undef CASE_VMSLT_OPCODES
1966 }
1967 // Mask operations use the LMUL from the mask type.
1968 switch (RISCVTargetLowering::getLMUL(VT)) {
1969 default:
1970 llvm_unreachable("Unexpected LMUL!");
1971#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
1972 case RISCVVType::lmulenum: \
1973 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1974 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
1975 break;
1976 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
1977 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
1978 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
1979 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
1980 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
1981 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
1982 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
1983#undef CASE_VMNAND_VMSET_OPCODES
1984 }
1985 SDValue SEW = CurDAG->getTargetConstant(
1986 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1987 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1988 SDValue VL;
1989 selectVLOp(Node->getOperand(3), VL);
1990
1991 // If vmsge(u) with minimum value, expand it to vmset.
1992 if (IsCmpMinimum) {
1994 CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, MaskSEW));
1995 return;
1996 }
1997
1998 if (IsCmpConstant) {
1999 SDValue Imm =
2000 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2001
2002 ReplaceNode(Node, CurDAG->getMachineNode(VMSGTOpcode, DL, VT,
2003 {Src1, Imm, VL, SEW}));
2004 return;
2005 }
2006
2007 // Expand to
2008 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
2009 SDValue Cmp = SDValue(
2010 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2011 0);
2012 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
2013 {Cmp, Cmp, VL, MaskSEW}));
2014 return;
2015 }
2016 case Intrinsic::riscv_vmsgeu_mask:
2017 case Intrinsic::riscv_vmsge_mask: {
2018 SDValue Src1 = Node->getOperand(2);
2019 SDValue Src2 = Node->getOperand(3);
2020 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
2021 bool IsCmpConstant = false;
2022 bool IsCmpMinimum = false;
2023 // Only custom select scalar second operand.
2024 if (Src2.getValueType() != XLenVT)
2025 break;
2026 // Small constants are handled with patterns.
2027 MVT Src1VT = Src1.getSimpleValueType();
2028 int64_t CVal = 0;
2029 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
2030 IsCmpConstant = true;
2031 CVal = C->getSExtValue();
2032 if (CVal >= -15 && CVal <= 16) {
2033 if (!IsUnsigned || CVal != 0)
2034 break;
2035 IsCmpMinimum = true;
2036 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2037 Src1VT.getScalarSizeInBits())
2038 .getSExtValue()) {
2039 IsCmpMinimum = true;
2040 }
2041 }
2042 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
2043 VMOROpcode, VMSGTMaskOpcode;
2044 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
2045 default:
2046 llvm_unreachable("Unexpected LMUL!");
2047#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2048 case RISCVVType::lmulenum: \
2049 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2050 : RISCV::PseudoVMSLT_VX_##suffix; \
2051 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
2052 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
2053 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
2054 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
2055 break;
2056 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2057 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2058 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2059 CASE_VMSLT_OPCODES(LMUL_1, M1)
2060 CASE_VMSLT_OPCODES(LMUL_2, M2)
2061 CASE_VMSLT_OPCODES(LMUL_4, M4)
2062 CASE_VMSLT_OPCODES(LMUL_8, M8)
2063#undef CASE_VMSLT_OPCODES
2064 }
2065 // Mask operations use the LMUL from the mask type.
2066 switch (RISCVTargetLowering::getLMUL(VT)) {
2067 default:
2068 llvm_unreachable("Unexpected LMUL!");
2069#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
2070 case RISCVVType::lmulenum: \
2071 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
2072 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
2073 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
2074 break;
2075 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
2076 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
2077 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
2082#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
2083 }
2084 SDValue SEW = CurDAG->getTargetConstant(
2085 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
2086 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
2087 SDValue VL;
2088 selectVLOp(Node->getOperand(5), VL);
2089 SDValue MaskedOff = Node->getOperand(1);
2090 SDValue Mask = Node->getOperand(4);
2091
2092 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
2093 if (IsCmpMinimum) {
2094 // We don't need vmor if the MaskedOff and the Mask are the same
2095 // value.
2096 if (Mask == MaskedOff) {
2097 ReplaceUses(Node, Mask.getNode());
2098 return;
2099 }
2101 CurDAG->getMachineNode(VMOROpcode, DL, VT,
2102 {Mask, MaskedOff, VL, MaskSEW}));
2103 return;
2104 }
2105
2106 // If the MaskedOff value and the Mask are the same value use
2107 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
2108 // This avoids needing to copy v0 to vd before starting the next sequence.
2109 if (Mask == MaskedOff) {
2110 SDValue Cmp = SDValue(
2111 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2112 0);
2113 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
2114 {Mask, Cmp, VL, MaskSEW}));
2115 return;
2116 }
2117
2118 SDValue PolicyOp =
2119 CurDAG->getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT);
2120
2121 if (IsCmpConstant) {
2122 SDValue Imm =
2123 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2124
2125 ReplaceNode(Node, CurDAG->getMachineNode(
2126 VMSGTMaskOpcode, DL, VT,
2127 {MaskedOff, Src1, Imm, Mask, VL, SEW, PolicyOp}));
2128 return;
2129 }
2130
2131 // Otherwise use
2132 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
2133 // The result is mask undisturbed.
2134 // We use the same instructions to emulate mask agnostic behavior, because
2135 // the agnostic result can be either undisturbed or all 1.
2136 SDValue Cmp = SDValue(CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
2137 {MaskedOff, Src1, Src2, Mask,
2138 VL, SEW, PolicyOp}),
2139 0);
2140 // vmxor.mm vd, vd, v0 is used to update active value.
2141 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
2142 {Cmp, Mask, VL, MaskSEW}));
2143 return;
2144 }
2145 case Intrinsic::riscv_vsetvli:
2146 case Intrinsic::riscv_vsetvlimax:
2147 return selectVSETVLI(Node);
2148 case Intrinsic::riscv_sf_vsettnt:
2149 case Intrinsic::riscv_sf_vsettm:
2150 case Intrinsic::riscv_sf_vsettk:
2151 return selectXSfmmVSET(Node);
2152 }
2153 break;
2154 }
2156 unsigned IntNo = Node->getConstantOperandVal(1);
2157 switch (IntNo) {
2158 // By default we do not custom select any intrinsic.
2159 default:
2160 break;
2161 case Intrinsic::riscv_vlseg2:
2162 case Intrinsic::riscv_vlseg3:
2163 case Intrinsic::riscv_vlseg4:
2164 case Intrinsic::riscv_vlseg5:
2165 case Intrinsic::riscv_vlseg6:
2166 case Intrinsic::riscv_vlseg7:
2167 case Intrinsic::riscv_vlseg8: {
2168 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2169 /*IsStrided*/ false);
2170 return;
2171 }
2172 case Intrinsic::riscv_vlseg2_mask:
2173 case Intrinsic::riscv_vlseg3_mask:
2174 case Intrinsic::riscv_vlseg4_mask:
2175 case Intrinsic::riscv_vlseg5_mask:
2176 case Intrinsic::riscv_vlseg6_mask:
2177 case Intrinsic::riscv_vlseg7_mask:
2178 case Intrinsic::riscv_vlseg8_mask: {
2179 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2180 /*IsStrided*/ false);
2181 return;
2182 }
2183 case Intrinsic::riscv_vlsseg2:
2184 case Intrinsic::riscv_vlsseg3:
2185 case Intrinsic::riscv_vlsseg4:
2186 case Intrinsic::riscv_vlsseg5:
2187 case Intrinsic::riscv_vlsseg6:
2188 case Intrinsic::riscv_vlsseg7:
2189 case Intrinsic::riscv_vlsseg8: {
2190 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2191 /*IsStrided*/ true);
2192 return;
2193 }
2194 case Intrinsic::riscv_vlsseg2_mask:
2195 case Intrinsic::riscv_vlsseg3_mask:
2196 case Intrinsic::riscv_vlsseg4_mask:
2197 case Intrinsic::riscv_vlsseg5_mask:
2198 case Intrinsic::riscv_vlsseg6_mask:
2199 case Intrinsic::riscv_vlsseg7_mask:
2200 case Intrinsic::riscv_vlsseg8_mask: {
2201 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2202 /*IsStrided*/ true);
2203 return;
2204 }
2205 case Intrinsic::riscv_vloxseg2:
2206 case Intrinsic::riscv_vloxseg3:
2207 case Intrinsic::riscv_vloxseg4:
2208 case Intrinsic::riscv_vloxseg5:
2209 case Intrinsic::riscv_vloxseg6:
2210 case Intrinsic::riscv_vloxseg7:
2211 case Intrinsic::riscv_vloxseg8:
2212 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2213 /*IsOrdered*/ true);
2214 return;
2215 case Intrinsic::riscv_vluxseg2:
2216 case Intrinsic::riscv_vluxseg3:
2217 case Intrinsic::riscv_vluxseg4:
2218 case Intrinsic::riscv_vluxseg5:
2219 case Intrinsic::riscv_vluxseg6:
2220 case Intrinsic::riscv_vluxseg7:
2221 case Intrinsic::riscv_vluxseg8:
2222 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2223 /*IsOrdered*/ false);
2224 return;
2225 case Intrinsic::riscv_vloxseg2_mask:
2226 case Intrinsic::riscv_vloxseg3_mask:
2227 case Intrinsic::riscv_vloxseg4_mask:
2228 case Intrinsic::riscv_vloxseg5_mask:
2229 case Intrinsic::riscv_vloxseg6_mask:
2230 case Intrinsic::riscv_vloxseg7_mask:
2231 case Intrinsic::riscv_vloxseg8_mask:
2232 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2233 /*IsOrdered*/ true);
2234 return;
2235 case Intrinsic::riscv_vluxseg2_mask:
2236 case Intrinsic::riscv_vluxseg3_mask:
2237 case Intrinsic::riscv_vluxseg4_mask:
2238 case Intrinsic::riscv_vluxseg5_mask:
2239 case Intrinsic::riscv_vluxseg6_mask:
2240 case Intrinsic::riscv_vluxseg7_mask:
2241 case Intrinsic::riscv_vluxseg8_mask:
2242 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2243 /*IsOrdered*/ false);
2244 return;
2245 case Intrinsic::riscv_vlseg8ff:
2246 case Intrinsic::riscv_vlseg7ff:
2247 case Intrinsic::riscv_vlseg6ff:
2248 case Intrinsic::riscv_vlseg5ff:
2249 case Intrinsic::riscv_vlseg4ff:
2250 case Intrinsic::riscv_vlseg3ff:
2251 case Intrinsic::riscv_vlseg2ff: {
2252 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ false);
2253 return;
2254 }
2255 case Intrinsic::riscv_vlseg8ff_mask:
2256 case Intrinsic::riscv_vlseg7ff_mask:
2257 case Intrinsic::riscv_vlseg6ff_mask:
2258 case Intrinsic::riscv_vlseg5ff_mask:
2259 case Intrinsic::riscv_vlseg4ff_mask:
2260 case Intrinsic::riscv_vlseg3ff_mask:
2261 case Intrinsic::riscv_vlseg2ff_mask: {
2262 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ true);
2263 return;
2264 }
2265 case Intrinsic::riscv_vloxei:
2266 case Intrinsic::riscv_vloxei_mask:
2267 case Intrinsic::riscv_vluxei:
2268 case Intrinsic::riscv_vluxei_mask: {
2269 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
2270 IntNo == Intrinsic::riscv_vluxei_mask;
2271 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
2272 IntNo == Intrinsic::riscv_vloxei_mask;
2273
2274 MVT VT = Node->getSimpleValueType(0);
2275 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2276
2277 unsigned CurOp = 2;
2278 SmallVector<SDValue, 8> Operands;
2279 Operands.push_back(Node->getOperand(CurOp++));
2280
2281 MVT IndexVT;
2282 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2283 /*IsStridedOrIndexed*/ true, Operands,
2284 /*IsLoad=*/true, &IndexVT);
2285
2287 "Element count mismatch");
2288
2291 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2292 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2293 reportFatalUsageError("The V extension does not support EEW=64 for "
2294 "index values when XLEN=32");
2295 }
2296 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2297 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
2298 static_cast<unsigned>(IndexLMUL));
2299 MachineSDNode *Load =
2300 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2301
2302 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2303
2304 ReplaceNode(Node, Load);
2305 return;
2306 }
2307 case Intrinsic::riscv_vlm:
2308 case Intrinsic::riscv_vle:
2309 case Intrinsic::riscv_vle_mask:
2310 case Intrinsic::riscv_vlse:
2311 case Intrinsic::riscv_vlse_mask: {
2312 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2313 IntNo == Intrinsic::riscv_vlse_mask;
2314 bool IsStrided =
2315 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2316
2317 MVT VT = Node->getSimpleValueType(0);
2318 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2319
2320 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2321 // operand at the IR level. In pseudos, they have both policy and
2322 // passthru operand. The passthru operand is needed to track the
2323 // "tail undefined" state, and the policy is there just for
2324 // for consistency - it will always be "don't care" for the
2325 // unmasked form.
2326 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2327 unsigned CurOp = 2;
2328 SmallVector<SDValue, 8> Operands;
2329 if (HasPassthruOperand)
2330 Operands.push_back(Node->getOperand(CurOp++));
2331 else {
2332 // We eagerly lower to implicit_def (instead of undef), as we
2333 // otherwise fail to select nodes such as: nxv1i1 = undef
2334 SDNode *Passthru =
2335 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
2336 Operands.push_back(SDValue(Passthru, 0));
2337 }
2338 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2339 Operands, /*IsLoad=*/true);
2340
2342 const RISCV::VLEPseudo *P =
2343 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2344 static_cast<unsigned>(LMUL));
2345 MachineSDNode *Load =
2346 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2347
2348 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2349
2350 ReplaceNode(Node, Load);
2351 return;
2352 }
2353 case Intrinsic::riscv_vleff:
2354 case Intrinsic::riscv_vleff_mask: {
2355 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2356
2357 MVT VT = Node->getSimpleValueType(0);
2358 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2359
2360 unsigned CurOp = 2;
2361 SmallVector<SDValue, 7> Operands;
2362 Operands.push_back(Node->getOperand(CurOp++));
2363 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2364 /*IsStridedOrIndexed*/ false, Operands,
2365 /*IsLoad=*/true);
2366
2368 const RISCV::VLEPseudo *P =
2369 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2370 Log2SEW, static_cast<unsigned>(LMUL));
2371 MachineSDNode *Load = CurDAG->getMachineNode(
2372 P->Pseudo, DL, Node->getVTList(), Operands);
2373 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2374
2375 ReplaceNode(Node, Load);
2376 return;
2377 }
2378 case Intrinsic::riscv_nds_vln:
2379 case Intrinsic::riscv_nds_vln_mask:
2380 case Intrinsic::riscv_nds_vlnu:
2381 case Intrinsic::riscv_nds_vlnu_mask: {
2382 bool IsMasked = IntNo == Intrinsic::riscv_nds_vln_mask ||
2383 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2384 bool IsUnsigned = IntNo == Intrinsic::riscv_nds_vlnu ||
2385 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2386
2387 MVT VT = Node->getSimpleValueType(0);
2388 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2389 unsigned CurOp = 2;
2390 SmallVector<SDValue, 8> Operands;
2391
2392 Operands.push_back(Node->getOperand(CurOp++));
2393 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2394 /*IsStridedOrIndexed=*/false, Operands,
2395 /*IsLoad=*/true);
2396
2398 const RISCV::NDSVLNPseudo *P = RISCV::getNDSVLNPseudo(
2399 IsMasked, IsUnsigned, Log2SEW, static_cast<unsigned>(LMUL));
2400 MachineSDNode *Load =
2401 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2402
2403 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2404 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2405
2406 ReplaceNode(Node, Load);
2407 return;
2408 }
2409 }
2410 break;
2411 }
2412 case ISD::INTRINSIC_VOID: {
2413 unsigned IntNo = Node->getConstantOperandVal(1);
2414 switch (IntNo) {
2415 case Intrinsic::riscv_vsseg2:
2416 case Intrinsic::riscv_vsseg3:
2417 case Intrinsic::riscv_vsseg4:
2418 case Intrinsic::riscv_vsseg5:
2419 case Intrinsic::riscv_vsseg6:
2420 case Intrinsic::riscv_vsseg7:
2421 case Intrinsic::riscv_vsseg8: {
2422 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2423 /*IsStrided*/ false);
2424 return;
2425 }
2426 case Intrinsic::riscv_vsseg2_mask:
2427 case Intrinsic::riscv_vsseg3_mask:
2428 case Intrinsic::riscv_vsseg4_mask:
2429 case Intrinsic::riscv_vsseg5_mask:
2430 case Intrinsic::riscv_vsseg6_mask:
2431 case Intrinsic::riscv_vsseg7_mask:
2432 case Intrinsic::riscv_vsseg8_mask: {
2433 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2434 /*IsStrided*/ false);
2435 return;
2436 }
2437 case Intrinsic::riscv_vssseg2:
2438 case Intrinsic::riscv_vssseg3:
2439 case Intrinsic::riscv_vssseg4:
2440 case Intrinsic::riscv_vssseg5:
2441 case Intrinsic::riscv_vssseg6:
2442 case Intrinsic::riscv_vssseg7:
2443 case Intrinsic::riscv_vssseg8: {
2444 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2445 /*IsStrided*/ true);
2446 return;
2447 }
2448 case Intrinsic::riscv_vssseg2_mask:
2449 case Intrinsic::riscv_vssseg3_mask:
2450 case Intrinsic::riscv_vssseg4_mask:
2451 case Intrinsic::riscv_vssseg5_mask:
2452 case Intrinsic::riscv_vssseg6_mask:
2453 case Intrinsic::riscv_vssseg7_mask:
2454 case Intrinsic::riscv_vssseg8_mask: {
2455 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2456 /*IsStrided*/ true);
2457 return;
2458 }
2459 case Intrinsic::riscv_vsoxseg2:
2460 case Intrinsic::riscv_vsoxseg3:
2461 case Intrinsic::riscv_vsoxseg4:
2462 case Intrinsic::riscv_vsoxseg5:
2463 case Intrinsic::riscv_vsoxseg6:
2464 case Intrinsic::riscv_vsoxseg7:
2465 case Intrinsic::riscv_vsoxseg8:
2466 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2467 /*IsOrdered*/ true);
2468 return;
2469 case Intrinsic::riscv_vsuxseg2:
2470 case Intrinsic::riscv_vsuxseg3:
2471 case Intrinsic::riscv_vsuxseg4:
2472 case Intrinsic::riscv_vsuxseg5:
2473 case Intrinsic::riscv_vsuxseg6:
2474 case Intrinsic::riscv_vsuxseg7:
2475 case Intrinsic::riscv_vsuxseg8:
2476 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2477 /*IsOrdered*/ false);
2478 return;
2479 case Intrinsic::riscv_vsoxseg2_mask:
2480 case Intrinsic::riscv_vsoxseg3_mask:
2481 case Intrinsic::riscv_vsoxseg4_mask:
2482 case Intrinsic::riscv_vsoxseg5_mask:
2483 case Intrinsic::riscv_vsoxseg6_mask:
2484 case Intrinsic::riscv_vsoxseg7_mask:
2485 case Intrinsic::riscv_vsoxseg8_mask:
2486 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2487 /*IsOrdered*/ true);
2488 return;
2489 case Intrinsic::riscv_vsuxseg2_mask:
2490 case Intrinsic::riscv_vsuxseg3_mask:
2491 case Intrinsic::riscv_vsuxseg4_mask:
2492 case Intrinsic::riscv_vsuxseg5_mask:
2493 case Intrinsic::riscv_vsuxseg6_mask:
2494 case Intrinsic::riscv_vsuxseg7_mask:
2495 case Intrinsic::riscv_vsuxseg8_mask:
2496 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2497 /*IsOrdered*/ false);
2498 return;
2499 case Intrinsic::riscv_vsoxei:
2500 case Intrinsic::riscv_vsoxei_mask:
2501 case Intrinsic::riscv_vsuxei:
2502 case Intrinsic::riscv_vsuxei_mask: {
2503 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2504 IntNo == Intrinsic::riscv_vsuxei_mask;
2505 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2506 IntNo == Intrinsic::riscv_vsoxei_mask;
2507
2508 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2509 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2510
2511 unsigned CurOp = 2;
2512 SmallVector<SDValue, 8> Operands;
2513 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2514
2515 MVT IndexVT;
2516 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2517 /*IsStridedOrIndexed*/ true, Operands,
2518 /*IsLoad=*/false, &IndexVT);
2519
2521 "Element count mismatch");
2522
2525 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2526 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2527 reportFatalUsageError("The V extension does not support EEW=64 for "
2528 "index values when XLEN=32");
2529 }
2530 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2531 IsMasked, IsOrdered, IndexLog2EEW,
2532 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2533 MachineSDNode *Store =
2534 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2535
2536 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2537
2538 ReplaceNode(Node, Store);
2539 return;
2540 }
2541 case Intrinsic::riscv_vsm:
2542 case Intrinsic::riscv_vse:
2543 case Intrinsic::riscv_vse_mask:
2544 case Intrinsic::riscv_vsse:
2545 case Intrinsic::riscv_vsse_mask: {
2546 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2547 IntNo == Intrinsic::riscv_vsse_mask;
2548 bool IsStrided =
2549 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2550
2551 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2552 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2553
2554 unsigned CurOp = 2;
2555 SmallVector<SDValue, 8> Operands;
2556 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2557
2558 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2559 Operands);
2560
2562 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2563 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2564 MachineSDNode *Store =
2565 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2566 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2567
2568 ReplaceNode(Node, Store);
2569 return;
2570 }
2571 case Intrinsic::riscv_sf_vc_x_se:
2572 case Intrinsic::riscv_sf_vc_i_se:
2574 return;
2575 case Intrinsic::riscv_sf_vlte8:
2576 case Intrinsic::riscv_sf_vlte16:
2577 case Intrinsic::riscv_sf_vlte32:
2578 case Intrinsic::riscv_sf_vlte64: {
2579 unsigned Log2SEW;
2580 unsigned PseudoInst;
2581 switch (IntNo) {
2582 case Intrinsic::riscv_sf_vlte8:
2583 PseudoInst = RISCV::PseudoSF_VLTE8;
2584 Log2SEW = 3;
2585 break;
2586 case Intrinsic::riscv_sf_vlte16:
2587 PseudoInst = RISCV::PseudoSF_VLTE16;
2588 Log2SEW = 4;
2589 break;
2590 case Intrinsic::riscv_sf_vlte32:
2591 PseudoInst = RISCV::PseudoSF_VLTE32;
2592 Log2SEW = 5;
2593 break;
2594 case Intrinsic::riscv_sf_vlte64:
2595 PseudoInst = RISCV::PseudoSF_VLTE64;
2596 Log2SEW = 6;
2597 break;
2598 }
2599
2600 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2601 SDValue TWidenOp = CurDAG->getTargetConstant(1, DL, XLenVT);
2602 SDValue Operands[] = {Node->getOperand(2),
2603 Node->getOperand(3),
2604 Node->getOperand(4),
2605 SEWOp,
2606 TWidenOp,
2607 Node->getOperand(0)};
2608
2609 MachineSDNode *TileLoad =
2610 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2611 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2612 CurDAG->setNodeMemRefs(TileLoad, {MemOp->getMemOperand()});
2613
2614 ReplaceNode(Node, TileLoad);
2615 return;
2616 }
2617 case Intrinsic::riscv_sf_mm_s_s:
2618 case Intrinsic::riscv_sf_mm_s_u:
2619 case Intrinsic::riscv_sf_mm_u_s:
2620 case Intrinsic::riscv_sf_mm_u_u:
2621 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2622 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2623 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2624 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2625 case Intrinsic::riscv_sf_mm_f_f: {
2626 bool HasFRM = false;
2627 unsigned PseudoInst;
2628 switch (IntNo) {
2629 case Intrinsic::riscv_sf_mm_s_s:
2630 PseudoInst = RISCV::PseudoSF_MM_S_S;
2631 break;
2632 case Intrinsic::riscv_sf_mm_s_u:
2633 PseudoInst = RISCV::PseudoSF_MM_S_U;
2634 break;
2635 case Intrinsic::riscv_sf_mm_u_s:
2636 PseudoInst = RISCV::PseudoSF_MM_U_S;
2637 break;
2638 case Intrinsic::riscv_sf_mm_u_u:
2639 PseudoInst = RISCV::PseudoSF_MM_U_U;
2640 break;
2641 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2642 PseudoInst = RISCV::PseudoSF_MM_E5M2_E5M2;
2643 HasFRM = true;
2644 break;
2645 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2646 PseudoInst = RISCV::PseudoSF_MM_E5M2_E4M3;
2647 HasFRM = true;
2648 break;
2649 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2650 PseudoInst = RISCV::PseudoSF_MM_E4M3_E5M2;
2651 HasFRM = true;
2652 break;
2653 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2654 PseudoInst = RISCV::PseudoSF_MM_E4M3_E4M3;
2655 HasFRM = true;
2656 break;
2657 case Intrinsic::riscv_sf_mm_f_f:
2658 if (Node->getOperand(3).getValueType().getScalarType() == MVT::bf16)
2659 PseudoInst = RISCV::PseudoSF_MM_F_F_ALT;
2660 else
2661 PseudoInst = RISCV::PseudoSF_MM_F_F;
2662 HasFRM = true;
2663 break;
2664 }
2665 uint64_t TileNum = Node->getConstantOperandVal(2);
2666 SDValue Op1 = Node->getOperand(3);
2667 SDValue Op2 = Node->getOperand(4);
2668 MVT VT = Op1->getSimpleValueType(0);
2669 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2670 SDValue TmOp = Node->getOperand(5);
2671 SDValue TnOp = Node->getOperand(6);
2672 SDValue TkOp = Node->getOperand(7);
2673 SDValue TWidenOp = Node->getOperand(8);
2674 SDValue Chain = Node->getOperand(0);
2675
2676 // sf.mm.f.f with sew=32, twiden=2 is invalid
2677 if (IntNo == Intrinsic::riscv_sf_mm_f_f && Log2SEW == 5 &&
2678 TWidenOp->getAsZExtVal() == 2)
2679 reportFatalUsageError("sf.mm.f.f doesn't support (sew=32, twiden=2)");
2680
2681 SmallVector<SDValue, 10> Operands(
2682 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Op1, Op2});
2683 if (HasFRM)
2684 Operands.push_back(
2685 CurDAG->getTargetConstant(RISCVFPRndMode::DYN, DL, XLenVT));
2686 Operands.append({TmOp, TnOp, TkOp,
2687 CurDAG->getTargetConstant(Log2SEW, DL, XLenVT), TWidenOp,
2688 Chain});
2689
2690 auto *NewNode =
2691 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2692
2693 ReplaceNode(Node, NewNode);
2694 return;
2695 }
2696 case Intrinsic::riscv_sf_vtzero_t: {
2697 uint64_t TileNum = Node->getConstantOperandVal(2);
2698 SDValue Tm = Node->getOperand(3);
2699 SDValue Tn = Node->getOperand(4);
2700 SDValue Log2SEW = Node->getOperand(5);
2701 SDValue TWiden = Node->getOperand(6);
2702 SDValue Chain = Node->getOperand(0);
2703 auto *NewNode = CurDAG->getMachineNode(
2704 RISCV::PseudoSF_VTZERO_T, DL, Node->getVTList(),
2705 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Tm, Tn, Log2SEW,
2706 TWiden, Chain});
2707
2708 ReplaceNode(Node, NewNode);
2709 return;
2710 }
2711 }
2712 break;
2713 }
2714 case ISD::BITCAST: {
2715 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2716 // Just drop bitcasts between vectors if both are fixed or both are
2717 // scalable.
2718 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2719 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2720 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2721 CurDAG->RemoveDeadNode(Node);
2722 return;
2723 }
2724 if (Subtarget->enablePExtCodeGen()) {
2725 bool Is32BitCast =
2726 (VT == MVT::i32 && (SrcVT == MVT::v4i8 || SrcVT == MVT::v2i16)) ||
2727 (SrcVT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
2728 bool Is64BitCast =
2729 (VT == MVT::i64 && (SrcVT == MVT::v8i8 || SrcVT == MVT::v4i16 ||
2730 SrcVT == MVT::v2i32)) ||
2731 (SrcVT == MVT::i64 &&
2732 (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32));
2733 if (Is32BitCast || Is64BitCast) {
2734 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2735 CurDAG->RemoveDeadNode(Node);
2736 return;
2737 }
2738 }
2739 break;
2740 }
2742 if (Subtarget->enablePExtCodeGen()) {
2743 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2744 if ((VT == MVT::v2i32 && SrcVT == MVT::i64) ||
2745 (VT == MVT::v4i8 && SrcVT == MVT::i32)) {
2746 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2747 CurDAG->RemoveDeadNode(Node);
2748 return;
2749 }
2750 }
2751 break;
2753 case RISCVISD::TUPLE_INSERT: {
2754 SDValue V = Node->getOperand(0);
2755 SDValue SubV = Node->getOperand(1);
2756 SDLoc DL(SubV);
2757 auto Idx = Node->getConstantOperandVal(2);
2758 MVT SubVecVT = SubV.getSimpleValueType();
2759
2760 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2761 MVT SubVecContainerVT = SubVecVT;
2762 // Establish the correct scalable-vector types for any fixed-length type.
2763 if (SubVecVT.isFixedLengthVector()) {
2764 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2766 [[maybe_unused]] bool ExactlyVecRegSized =
2767 Subtarget->expandVScale(SubVecVT.getSizeInBits())
2768 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2769 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2770 .getKnownMinValue()));
2771 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2772 }
2773 MVT ContainerVT = VT;
2774 if (VT.isFixedLengthVector())
2775 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2776
2777 const auto *TRI = Subtarget->getRegisterInfo();
2778 unsigned SubRegIdx;
2779 std::tie(SubRegIdx, Idx) =
2781 ContainerVT, SubVecContainerVT, Idx, TRI);
2782
2783 // If the Idx hasn't been completely eliminated then this is a subvector
2784 // insert which doesn't naturally align to a vector register. These must
2785 // be handled using instructions to manipulate the vector registers.
2786 if (Idx != 0)
2787 break;
2788
2789 RISCVVType::VLMUL SubVecLMUL =
2790 RISCVTargetLowering::getLMUL(SubVecContainerVT);
2791 [[maybe_unused]] bool IsSubVecPartReg =
2792 SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
2793 SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
2794 SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
2795 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
2796 V.isUndef()) &&
2797 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2798 "the subvector is smaller than a full-sized register");
2799
2800 // If we haven't set a SubRegIdx, then we must be going between
2801 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2802 if (SubRegIdx == RISCV::NoSubRegister) {
2803 unsigned InRegClassID =
2806 InRegClassID &&
2807 "Unexpected subvector extraction");
2808 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2809 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2810 DL, VT, SubV, RC);
2811 ReplaceNode(Node, NewNode);
2812 return;
2813 }
2814
2815 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2816 ReplaceNode(Node, Insert.getNode());
2817 return;
2818 }
2820 case RISCVISD::TUPLE_EXTRACT: {
2821 SDValue V = Node->getOperand(0);
2822 auto Idx = Node->getConstantOperandVal(1);
2823 MVT InVT = V.getSimpleValueType();
2824 SDLoc DL(V);
2825
2826 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2827 MVT SubVecContainerVT = VT;
2828 // Establish the correct scalable-vector types for any fixed-length type.
2829 if (VT.isFixedLengthVector()) {
2830 assert(Idx == 0);
2831 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2832 }
2833 if (InVT.isFixedLengthVector())
2834 InVT = TLI.getContainerForFixedLengthVector(InVT);
2835
2836 const auto *TRI = Subtarget->getRegisterInfo();
2837 unsigned SubRegIdx;
2838 std::tie(SubRegIdx, Idx) =
2840 InVT, SubVecContainerVT, Idx, TRI);
2841
2842 // If the Idx hasn't been completely eliminated then this is a subvector
2843 // extract which doesn't naturally align to a vector register. These must
2844 // be handled using instructions to manipulate the vector registers.
2845 if (Idx != 0)
2846 break;
2847
2848 // If we haven't set a SubRegIdx, then we must be going between
2849 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2850 if (SubRegIdx == RISCV::NoSubRegister) {
2851 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2853 InRegClassID &&
2854 "Unexpected subvector extraction");
2855 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2856 SDNode *NewNode =
2857 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2858 ReplaceNode(Node, NewNode);
2859 return;
2860 }
2861
2862 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2863 ReplaceNode(Node, Extract.getNode());
2864 return;
2865 }
2866 case RISCVISD::VMV_S_X_VL:
2867 case RISCVISD::VFMV_S_F_VL:
2868 case RISCVISD::VMV_V_X_VL:
2869 case RISCVISD::VFMV_V_F_VL: {
2870 // Try to match splat of a scalar load to a strided load with stride of x0.
2871 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2872 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2873 if (!Node->getOperand(0).isUndef())
2874 break;
2875 SDValue Src = Node->getOperand(1);
2876 auto *Ld = dyn_cast<LoadSDNode>(Src);
2877 // Can't fold load update node because the second
2878 // output is used so that load update node can't be removed.
2879 if (!Ld || Ld->isIndexed())
2880 break;
2881 EVT MemVT = Ld->getMemoryVT();
2882 // The memory VT should be the same size as the element type.
2883 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2884 break;
2885 if (!IsProfitableToFold(Src, Node, Node) ||
2886 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2887 break;
2888
2889 SDValue VL;
2890 if (IsScalarMove) {
2891 // We could deal with more VL if we update the VSETVLI insert pass to
2892 // avoid introducing more VSETVLI.
2893 if (!isOneConstant(Node->getOperand(2)))
2894 break;
2895 selectVLOp(Node->getOperand(2), VL);
2896 } else
2897 selectVLOp(Node->getOperand(2), VL);
2898
2899 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2900 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2901
2902 // If VL=1, then we don't need to do a strided load and can just do a
2903 // regular load.
2904 bool IsStrided = !isOneConstant(VL);
2905
2906 // Only do a strided load if we have optimized zero-stride vector load.
2907 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2908 break;
2909
2910 SmallVector<SDValue> Operands = {
2911 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2912 Ld->getBasePtr()};
2913 if (IsStrided)
2914 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2916 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2917 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2918
2920 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2921 /*IsMasked*/ false, IsStrided, /*FF*/ false,
2922 Log2SEW, static_cast<unsigned>(LMUL));
2923 MachineSDNode *Load =
2924 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2925 // Update the chain.
2926 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2927 // Record the mem-refs
2928 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2929 // Replace the splat with the vlse.
2930 ReplaceNode(Node, Load);
2931 return;
2932 }
2933 case ISD::PREFETCH:
2934 unsigned Locality = Node->getConstantOperandVal(3);
2935 if (Locality > 2)
2936 break;
2937
2938 auto *LoadStoreMem = cast<MemSDNode>(Node);
2939 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2941
2942 int NontemporalLevel = 0;
2943 switch (Locality) {
2944 case 0:
2945 NontemporalLevel = 3; // NTL.ALL
2946 break;
2947 case 1:
2948 NontemporalLevel = 1; // NTL.PALL
2949 break;
2950 case 2:
2951 NontemporalLevel = 0; // NTL.P1
2952 break;
2953 default:
2954 llvm_unreachable("unexpected locality value.");
2955 }
2956
2957 if (NontemporalLevel & 0b1)
2959 if (NontemporalLevel & 0b10)
2961 break;
2962 }
2963
2964 // Select the default instruction.
2965 SelectCode(Node);
2966}
2967
2969 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2970 std::vector<SDValue> &OutOps) {
2971 // Always produce a register and immediate operand, as expected by
2972 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2973 switch (ConstraintID) {
2976 SDValue Op0, Op1;
2977 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2978 assert(Found && "SelectAddrRegImm should always succeed");
2979 OutOps.push_back(Op0);
2980 OutOps.push_back(Op1);
2981 return false;
2982 }
2984 OutOps.push_back(Op);
2985 OutOps.push_back(
2986 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2987 return false;
2988 default:
2989 report_fatal_error("Unexpected asm memory constraint " +
2990 InlineAsm::getMemConstraintName(ConstraintID));
2991 }
2992
2993 return true;
2994}
2995
2997 SDValue &Offset) {
2998 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2999 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
3000 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
3001 return true;
3002 }
3003
3004 return false;
3005}
3006
3007// Fold constant addresses.
3008static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
3009 const MVT VT, const RISCVSubtarget *Subtarget,
3011 bool IsPrefetch = false) {
3012 if (!isa<ConstantSDNode>(Addr))
3013 return false;
3014
3015 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
3016
3017 // If the constant is a simm12, we can fold the whole constant and use X0 as
3018 // the base. If the constant can be materialized with LUI+simm12, use LUI as
3019 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
3020 int64_t Lo12 = SignExtend64<12>(CVal);
3021 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
3022 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
3023 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3024 return false;
3025 if (Hi) {
3026 int64_t Hi20 = (Hi >> 12) & 0xfffff;
3027 Base = SDValue(
3028 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
3029 CurDAG->getTargetConstant(Hi20, DL, VT)),
3030 0);
3031 } else {
3032 Base = CurDAG->getRegister(RISCV::X0, VT);
3033 }
3034 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
3035 return true;
3036 }
3037
3038 // Ask how constant materialization would handle this constant.
3039 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
3040
3041 // If the last instruction would be an ADDI, we can fold its immediate and
3042 // emit the rest of the sequence as the base.
3043 if (Seq.back().getOpcode() != RISCV::ADDI)
3044 return false;
3045 Lo12 = Seq.back().getImm();
3046 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3047 return false;
3048
3049 // Drop the last instruction.
3050 Seq.pop_back();
3051 assert(!Seq.empty() && "Expected more instructions in sequence");
3052
3053 Base = selectImmSeq(CurDAG, DL, VT, Seq);
3054 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
3055 return true;
3056}
3057
3058// Is this ADD instruction only used as the base pointer of scalar loads and
3059// stores?
3061 for (auto *User : Add->users()) {
3062 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
3063 User->getOpcode() != RISCVISD::LD_RV32 &&
3064 User->getOpcode() != RISCVISD::SD_RV32 &&
3065 User->getOpcode() != ISD::ATOMIC_LOAD &&
3066 User->getOpcode() != ISD::ATOMIC_STORE)
3067 return false;
3068 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3069 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
3070 VT != MVT::f64)
3071 return false;
3072 // Don't allow stores of the value. It must be used as the address.
3073 if (User->getOpcode() == ISD::STORE &&
3074 cast<StoreSDNode>(User)->getValue() == Add)
3075 return false;
3076 if (User->getOpcode() == ISD::ATOMIC_STORE &&
3077 cast<AtomicSDNode>(User)->getVal() == Add)
3078 return false;
3079 if (User->getOpcode() == RISCVISD::SD_RV32 &&
3080 (User->getOperand(0) == Add || User->getOperand(1) == Add))
3081 return false;
3082 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
3083 return false;
3084 }
3085
3086 return true;
3087}
3088
3090 switch (User->getOpcode()) {
3091 default:
3092 return false;
3093 case ISD::LOAD:
3094 case RISCVISD::LD_RV32:
3095 case ISD::ATOMIC_LOAD:
3096 break;
3097 case ISD::STORE:
3098 // Don't allow stores of Add. It must only be used as the address.
3099 if (cast<StoreSDNode>(User)->getValue() == Add)
3100 return false;
3101 break;
3102 case RISCVISD::SD_RV32:
3103 // Don't allow stores of Add. It must only be used as the address.
3104 if (User->getOperand(0) == Add || User->getOperand(1) == Add)
3105 return false;
3106 break;
3107 case ISD::ATOMIC_STORE:
3108 // Don't allow stores of Add. It must only be used as the address.
3109 if (cast<AtomicSDNode>(User)->getVal() == Add)
3110 return false;
3111 break;
3112 }
3113
3114 return true;
3115}
3116
3117// To prevent SelectAddrRegImm from folding offsets that conflict with the
3118// fusion of PseudoMovAddr, check if the offset of every use of a given address
3119// is within the alignment.
3121 Align Alignment) {
3122 assert(Addr->getOpcode() == RISCVISD::ADD_LO);
3123 for (auto *User : Addr->users()) {
3124 // If the user is a load or store, then the offset is 0 which is always
3125 // within alignment.
3126 if (isRegImmLoadOrStore(User, Addr))
3127 continue;
3128
3129 if (CurDAG->isBaseWithConstantOffset(SDValue(User, 0))) {
3130 int64_t CVal = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3131 if (!isInt<12>(CVal) || Alignment <= CVal)
3132 return false;
3133
3134 // Make sure all uses are foldable load/stores.
3135 for (auto *AddUser : User->users())
3136 if (!isRegImmLoadOrStore(AddUser, SDValue(User, 0)))
3137 return false;
3138
3139 continue;
3140 }
3141
3142 return false;
3143 }
3144
3145 return true;
3146}
3147
3149 SDValue &Offset) {
3150 if (SelectAddrFrameIndex(Addr, Base, Offset))
3151 return true;
3152
3153 SDLoc DL(Addr);
3154 MVT VT = Addr.getSimpleValueType();
3155
3156 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
3157 bool CanFold = true;
3158 // Unconditionally fold if operand 1 is not a global address (e.g.
3159 // externsymbol)
3160 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
3161 const DataLayout &DL = CurDAG->getDataLayout();
3162 Align Alignment = commonAlignment(
3163 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3164 if (!areOffsetsWithinAlignment(Addr, Alignment))
3165 CanFold = false;
3166 }
3167 if (CanFold) {
3168 Base = Addr.getOperand(0);
3169 Offset = Addr.getOperand(1);
3170 return true;
3171 }
3172 }
3173
3174 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3175 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3176 if (isInt<12>(CVal)) {
3177 Base = Addr.getOperand(0);
3178 if (Base.getOpcode() == RISCVISD::ADD_LO) {
3179 SDValue LoOperand = Base.getOperand(1);
3180 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
3181 // If the Lo in (ADD_LO hi, lo) is a global variable's address
3182 // (its low part, really), then we can rely on the alignment of that
3183 // variable to provide a margin of safety before low part can overflow
3184 // the 12 bits of the load/store offset. Check if CVal falls within
3185 // that margin; if so (low part + CVal) can't overflow.
3186 const DataLayout &DL = CurDAG->getDataLayout();
3187 Align Alignment = commonAlignment(
3188 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3189 if ((CVal == 0 || Alignment > CVal) &&
3190 areOffsetsWithinAlignment(Base, Alignment)) {
3191 int64_t CombinedOffset = CVal + GA->getOffset();
3192 Base = Base.getOperand(0);
3193 Offset = CurDAG->getTargetGlobalAddress(
3194 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
3195 CombinedOffset, GA->getTargetFlags());
3196 return true;
3197 }
3198 }
3199 }
3200
3201 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3202 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3203 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3204 return true;
3205 }
3206 }
3207
3208 // Handle ADD with large immediates.
3209 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3210 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3211 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3212
3213 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
3214 // an ADDI for part of the offset and fold the rest into the load/store.
3215 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
3216 if (CVal >= -4096 && CVal <= 4094) {
3217 int64_t Adj = CVal < 0 ? -2048 : 2047;
3218 Base = SDValue(
3219 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
3220 CurDAG->getSignedTargetConstant(Adj, DL, VT)),
3221 0);
3222 Offset = CurDAG->getSignedTargetConstant(CVal - Adj, DL, VT);
3223 return true;
3224 }
3225
3226 // For larger immediates, we might be able to save one instruction from
3227 // constant materialization by folding the Lo12 bits of the immediate into
3228 // the address. We should only do this if the ADD is only used by loads and
3229 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
3230 // separately with the full materialized immediate creating extra
3231 // instructions.
3232 if (isWorthFoldingAdd(Addr) &&
3233 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3234 Offset, /*IsPrefetch=*/false)) {
3235 // Insert an ADD instruction with the materialized Hi52 bits.
3236 Base = SDValue(
3237 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3238 0);
3239 return true;
3240 }
3241 }
3242
3243 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3244 /*IsPrefetch=*/false))
3245 return true;
3246
3247 Base = Addr;
3248 Offset = CurDAG->getTargetConstant(0, DL, VT);
3249 return true;
3250}
3251
3252/// Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
3254 SDValue &Offset) {
3255 if (SelectAddrFrameIndex(Addr, Base, Offset))
3256 return true;
3257
3258 SDLoc DL(Addr);
3259 MVT VT = Addr.getSimpleValueType();
3260
3261 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3262 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3263 if (isUInt<9>(CVal)) {
3264 Base = Addr.getOperand(0);
3265
3266 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3267 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3268 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3269 return true;
3270 }
3271 }
3272
3273 Base = Addr;
3274 Offset = CurDAG->getTargetConstant(0, DL, VT);
3275 return true;
3276}
3277
3278/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
3279/// Offset should be all zeros.
3281 SDValue &Offset) {
3282 if (SelectAddrFrameIndex(Addr, Base, Offset))
3283 return true;
3284
3285 SDLoc DL(Addr);
3286 MVT VT = Addr.getSimpleValueType();
3287
3288 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3289 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3290 if (isInt<12>(CVal)) {
3291 Base = Addr.getOperand(0);
3292
3293 // Early-out if not a valid offset.
3294 if ((CVal & 0b11111) != 0) {
3295 Base = Addr;
3296 Offset = CurDAG->getTargetConstant(0, DL, VT);
3297 return true;
3298 }
3299
3300 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3301 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3302 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3303 return true;
3304 }
3305 }
3306
3307 // Handle ADD with large immediates.
3308 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3309 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3310 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3311
3312 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
3313 // one instruction by folding adjustment (-2048 or 2016) into the address.
3314 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
3315 int64_t Adj = CVal < 0 ? -2048 : 2016;
3316 int64_t AdjustedOffset = CVal - Adj;
3317 Base =
3318 SDValue(CurDAG->getMachineNode(
3319 RISCV::ADDI, DL, VT, Addr.getOperand(0),
3320 CurDAG->getSignedTargetConstant(AdjustedOffset, DL, VT)),
3321 0);
3322 Offset = CurDAG->getSignedTargetConstant(Adj, DL, VT);
3323 return true;
3324 }
3325
3326 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3327 Offset, /*IsPrefetch=*/true)) {
3328 // Insert an ADD instruction with the materialized Hi52 bits.
3329 Base = SDValue(
3330 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3331 0);
3332 return true;
3333 }
3334 }
3335
3336 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3337 /*IsPrefetch=*/true))
3338 return true;
3339
3340 Base = Addr;
3341 Offset = CurDAG->getTargetConstant(0, DL, VT);
3342 return true;
3343}
3344
3345/// Return true if this a load/store that we have a RegRegScale instruction for.
3347 const RISCVSubtarget &Subtarget) {
3348 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE)
3349 return false;
3350 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3351 if (!(VT.isScalarInteger() &&
3352 (Subtarget.hasVendorXTHeadMemIdx() || Subtarget.hasVendorXqcisls())) &&
3353 !((VT == MVT::f32 || VT == MVT::f64) &&
3354 Subtarget.hasVendorXTHeadFMemIdx()))
3355 return false;
3356 // Don't allow stores of the value. It must be used as the address.
3357 if (User->getOpcode() == ISD::STORE &&
3358 cast<StoreSDNode>(User)->getValue() == Add)
3359 return false;
3360
3361 return true;
3362}
3363
3364/// Is it profitable to fold this Add into RegRegScale load/store. If \p
3365/// Shift is non-null, then we have matched a shl+add. We allow reassociating
3366/// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) if there is a
3367/// single addi and we don't have a SHXADD instruction we could use.
3368/// FIXME: May still need to check how many and what kind of users the SHL has.
3370 SDValue Add,
3371 SDValue Shift = SDValue()) {
3372 bool FoundADDI = false;
3373 for (auto *User : Add->users()) {
3374 if (isRegRegScaleLoadOrStore(User, Add, Subtarget))
3375 continue;
3376
3377 // Allow a single ADDI that is used by loads/stores if we matched a shift.
3378 if (!Shift || FoundADDI || User->getOpcode() != ISD::ADD ||
3380 !isInt<12>(cast<ConstantSDNode>(User->getOperand(1))->getSExtValue()))
3381 return false;
3382
3383 FoundADDI = true;
3384
3385 // If we have a SHXADD instruction, prefer that over reassociating an ADDI.
3386 assert(Shift.getOpcode() == ISD::SHL);
3387 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
3388 if (Subtarget.hasShlAdd(ShiftAmt))
3389 return false;
3390
3391 // All users of the ADDI should be load/store.
3392 for (auto *ADDIUser : User->users())
3393 if (!isRegRegScaleLoadOrStore(ADDIUser, SDValue(User, 0), Subtarget))
3394 return false;
3395 }
3396
3397 return true;
3398}
3399
3401 unsigned MaxShiftAmount,
3402 SDValue &Base, SDValue &Index,
3403 SDValue &Scale) {
3404 if (Addr.getOpcode() != ISD::ADD)
3405 return false;
3406 SDValue LHS = Addr.getOperand(0);
3407 SDValue RHS = Addr.getOperand(1);
3408
3409 EVT VT = Addr.getSimpleValueType();
3410 auto SelectShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
3411 SDValue &Shift) {
3412 if (N.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(N.getOperand(1)))
3413 return false;
3414
3415 // Only match shifts by a value in range [0, MaxShiftAmount].
3416 unsigned ShiftAmt = N.getConstantOperandVal(1);
3417 if (ShiftAmt > MaxShiftAmount)
3418 return false;
3419
3420 Index = N.getOperand(0);
3421 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
3422 return true;
3423 };
3424
3425 if (auto *C1 = dyn_cast<ConstantSDNode>(RHS)) {
3426 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
3427 if (LHS.getOpcode() == ISD::ADD &&
3428 !isa<ConstantSDNode>(LHS.getOperand(1)) &&
3429 isInt<12>(C1->getSExtValue())) {
3430 if (SelectShl(LHS.getOperand(1), Index, Scale) &&
3431 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(1))) {
3432 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3433 SDLoc(Addr), VT);
3434 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3435 LHS.getOperand(0), C1Val),
3436 0);
3437 return true;
3438 }
3439
3440 // Add is commutative so we need to check both operands.
3441 if (SelectShl(LHS.getOperand(0), Index, Scale) &&
3442 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(0))) {
3443 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3444 SDLoc(Addr), VT);
3445 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3446 LHS.getOperand(1), C1Val),
3447 0);
3448 return true;
3449 }
3450 }
3451
3452 // Don't match add with constants.
3453 // FIXME: Is this profitable for large constants that have 0s in the lower
3454 // 12 bits that we can materialize with LUI?
3455 return false;
3456 }
3457
3458 // Try to match a shift on the RHS.
3459 if (SelectShl(RHS, Index, Scale)) {
3460 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, RHS))
3461 return false;
3462 Base = LHS;
3463 return true;
3464 }
3465
3466 // Try to match a shift on the LHS.
3467 if (SelectShl(LHS, Index, Scale)) {
3468 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, LHS))
3469 return false;
3470 Base = RHS;
3471 return true;
3472 }
3473
3474 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr))
3475 return false;
3476
3477 Base = LHS;
3478 Index = RHS;
3479 Scale = CurDAG->getTargetConstant(0, SDLoc(Addr), VT);
3480 return true;
3481}
3482
3484 unsigned MaxShiftAmount,
3485 unsigned Bits, SDValue &Base,
3486 SDValue &Index,
3487 SDValue &Scale) {
3488 if (!SelectAddrRegRegScale(Addr, MaxShiftAmount, Base, Index, Scale))
3489 return false;
3490
3491 if (Index.getOpcode() == ISD::AND) {
3492 auto *C = dyn_cast<ConstantSDNode>(Index.getOperand(1));
3493 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3494 Index = Index.getOperand(0);
3495 return true;
3496 }
3497 }
3498
3499 return false;
3500}
3501
3503 SDValue &Offset) {
3504 if (Addr.getOpcode() != ISD::ADD)
3505 return false;
3506
3507 if (isa<ConstantSDNode>(Addr.getOperand(1)))
3508 return false;
3509
3510 Base = Addr.getOperand(0);
3511 Offset = Addr.getOperand(1);
3512 return true;
3513}
3514
3516 SDValue &ShAmt) {
3517 ShAmt = N;
3518
3519 // Peek through zext.
3520 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
3521 ShAmt = ShAmt.getOperand(0);
3522
3523 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
3524 // amount. If there is an AND on the shift amount, we can bypass it if it
3525 // doesn't affect any of those bits.
3526 if (ShAmt.getOpcode() == ISD::AND &&
3527 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3528 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
3529
3530 // Since the max shift amount is a power of 2 we can subtract 1 to make a
3531 // mask that covers the bits needed to represent all shift amounts.
3532 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
3533 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
3534
3535 if (ShMask.isSubsetOf(AndMask)) {
3536 ShAmt = ShAmt.getOperand(0);
3537 } else {
3538 // SimplifyDemandedBits may have optimized the mask so try restoring any
3539 // bits that are known zero.
3540 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
3541 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
3542 return true;
3543 ShAmt = ShAmt.getOperand(0);
3544 }
3545 }
3546
3547 if (ShAmt.getOpcode() == ISD::ADD &&
3548 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3549 uint64_t Imm = ShAmt.getConstantOperandVal(1);
3550 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
3551 // to avoid the ADD.
3552 if (Imm != 0 && Imm % ShiftWidth == 0) {
3553 ShAmt = ShAmt.getOperand(0);
3554 return true;
3555 }
3556 } else if (ShAmt.getOpcode() == ISD::SUB &&
3557 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
3558 uint64_t Imm = ShAmt.getConstantOperandVal(0);
3559 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3560 // generate a NEG instead of a SUB of a constant.
3561 if (Imm != 0 && Imm % ShiftWidth == 0) {
3562 SDLoc DL(ShAmt);
3563 EVT VT = ShAmt.getValueType();
3564 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
3565 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
3566 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
3567 ShAmt.getOperand(1));
3568 ShAmt = SDValue(Neg, 0);
3569 return true;
3570 }
3571 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3572 // to generate a NOT instead of a SUB of a constant.
3573 if (Imm % ShiftWidth == ShiftWidth - 1) {
3574 SDLoc DL(ShAmt);
3575 EVT VT = ShAmt.getValueType();
3576 MachineSDNode *Not = CurDAG->getMachineNode(
3577 RISCV::XORI, DL, VT, ShAmt.getOperand(1),
3578 CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true));
3579 ShAmt = SDValue(Not, 0);
3580 return true;
3581 }
3582 }
3583
3584 return true;
3585}
3586
3587/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
3588/// check for equality with 0. This function emits instructions that convert the
3589/// seteq/setne into something that can be compared with 0.
3590/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
3591/// ISD::SETNE).
3593 SDValue &Val) {
3594 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
3595 "Unexpected condition code!");
3596
3597 // We're looking for a setcc.
3598 if (N->getOpcode() != ISD::SETCC)
3599 return false;
3600
3601 // Must be an equality comparison.
3602 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
3603 if (CCVal != ExpectedCCVal)
3604 return false;
3605
3606 SDValue LHS = N->getOperand(0);
3607 SDValue RHS = N->getOperand(1);
3608
3609 if (!LHS.getValueType().isScalarInteger())
3610 return false;
3611
3612 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
3613 if (isNullConstant(RHS)) {
3614 Val = LHS;
3615 return true;
3616 }
3617
3618 SDLoc DL(N);
3619
3620 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
3621 int64_t CVal = C->getSExtValue();
3622 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
3623 // non-zero otherwise.
3624 if (CVal == -2048) {
3625 Val = SDValue(
3626 CurDAG->getMachineNode(
3627 RISCV::XORI, DL, N->getValueType(0), LHS,
3628 CurDAG->getSignedTargetConstant(CVal, DL, N->getValueType(0))),
3629 0);
3630 return true;
3631 }
3632 // If the RHS is [-2047,2048], we can use addi/addiw with -RHS to produce 0
3633 // if the LHS is equal to the RHS and non-zero otherwise.
3634 if (isInt<12>(CVal) || CVal == 2048) {
3635 unsigned Opc = RISCV::ADDI;
3636 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3637 cast<VTSDNode>(LHS.getOperand(1))->getVT() == MVT::i32) {
3638 Opc = RISCV::ADDIW;
3639 LHS = LHS.getOperand(0);
3640 }
3641
3642 Val = SDValue(CurDAG->getMachineNode(Opc, DL, N->getValueType(0), LHS,
3643 CurDAG->getSignedTargetConstant(
3644 -CVal, DL, N->getValueType(0))),
3645 0);
3646 return true;
3647 }
3648 if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) {
3649 Val = SDValue(
3650 CurDAG->getMachineNode(
3651 RISCV::BINVI, DL, N->getValueType(0), LHS,
3652 CurDAG->getTargetConstant(Log2_64(CVal), DL, N->getValueType(0))),
3653 0);
3654 return true;
3655 }
3656 // Same as the addi case above but for larger immediates (signed 26-bit) use
3657 // the QC_E_ADDI instruction from the Xqcilia extension, if available. Avoid
3658 // anything which can be done with a single lui as it might be compressible.
3659 if (Subtarget->hasVendorXqcilia() && isInt<26>(CVal) &&
3660 (CVal & 0xFFF) != 0) {
3661 Val = SDValue(
3662 CurDAG->getMachineNode(
3663 RISCV::QC_E_ADDI, DL, N->getValueType(0), LHS,
3664 CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
3665 0);
3666 return true;
3667 }
3668 }
3669
3670 // If nothing else we can XOR the LHS and RHS to produce zero if they are
3671 // equal and a non-zero value if they aren't.
3672 Val = SDValue(
3673 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
3674 return true;
3675}
3676
3678 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3679 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
3680 Val = N.getOperand(0);
3681 return true;
3682 }
3683
3684 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
3685 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
3686 return N;
3687
3688 SDValue N0 = N.getOperand(0);
3689 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3690 N.getConstantOperandVal(1) == ShiftAmt &&
3691 N0.getConstantOperandVal(1) == ShiftAmt)
3692 return N0.getOperand(0);
3693
3694 return N;
3695 };
3696
3697 MVT VT = N.getSimpleValueType();
3698 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
3699 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
3700 return true;
3701 }
3702
3703 return false;
3704}
3705
3707 if (N.getOpcode() == ISD::AND) {
3708 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3709 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3710 Val = N.getOperand(0);
3711 return true;
3712 }
3713 }
3714 MVT VT = N.getSimpleValueType();
3715 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
3716 if (CurDAG->MaskedValueIsZero(N, Mask)) {
3717 Val = N;
3718 return true;
3719 }
3720
3721 return false;
3722}
3723
3724/// Look for various patterns that can be done with a SHL that can be folded
3725/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
3726/// SHXADD we are trying to match.
3728 SDValue &Val) {
3729 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
3730 SDValue N0 = N.getOperand(0);
3731
3732 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
3733 (LeftShift || N0.getOpcode() == ISD::SRL) &&
3735 uint64_t Mask = N.getConstantOperandVal(1);
3736 unsigned C2 = N0.getConstantOperandVal(1);
3737
3738 unsigned XLen = Subtarget->getXLen();
3739 if (LeftShift)
3740 Mask &= maskTrailingZeros<uint64_t>(C2);
3741 else
3742 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
3743
3744 if (isShiftedMask_64(Mask)) {
3745 unsigned Leading = XLen - llvm::bit_width(Mask);
3746 unsigned Trailing = llvm::countr_zero(Mask);
3747 if (Trailing != ShAmt)
3748 return false;
3749
3750 unsigned Opcode;
3751 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
3752 // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
3753 // followed by a SHXADD with c3 for the X amount.
3754 if (LeftShift && Leading == 0 && C2 < Trailing)
3755 Opcode = RISCV::SRLI;
3756 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
3757 // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
3758 // followed by a SHXADD with c3 for the X amount.
3759 else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
3760 Opcode = RISCV::SRLIW;
3761 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3762 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
3763 // followed by a SHXADD using c3 for the X amount.
3764 else if (!LeftShift && Leading == C2)
3765 Opcode = RISCV::SRLI;
3766 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
3767 // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
3768 // followed by a SHXADD using c3 for the X amount.
3769 else if (!LeftShift && Leading == 32 + C2)
3770 Opcode = RISCV::SRLIW;
3771 else
3772 return false;
3773
3774 SDLoc DL(N);
3775 EVT VT = N.getValueType();
3776 ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
3777 Val = SDValue(
3778 CurDAG->getMachineNode(Opcode, DL, VT, N0.getOperand(0),
3779 CurDAG->getTargetConstant(ShAmt, DL, VT)),
3780 0);
3781 return true;
3782 }
3783 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
3785 uint64_t Mask = N.getConstantOperandVal(1);
3786 unsigned C2 = N0.getConstantOperandVal(1);
3787
3788 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
3789 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
3790 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
3791 // the X amount.
3792 if (isShiftedMask_64(Mask)) {
3793 unsigned XLen = Subtarget->getXLen();
3794 unsigned Leading = XLen - llvm::bit_width(Mask);
3795 unsigned Trailing = llvm::countr_zero(Mask);
3796 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
3797 SDLoc DL(N);
3798 EVT VT = N.getValueType();
3799 Val = SDValue(CurDAG->getMachineNode(
3800 RISCV::SRAI, DL, VT, N0.getOperand(0),
3801 CurDAG->getTargetConstant(C2 - Leading, DL, VT)),
3802 0);
3803 Val = SDValue(CurDAG->getMachineNode(
3804 RISCV::SRLI, DL, VT, Val,
3805 CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)),
3806 0);
3807 return true;
3808 }
3809 }
3810 }
3811 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
3812 (LeftShift || N.getOpcode() == ISD::SRL) &&
3813 isa<ConstantSDNode>(N.getOperand(1))) {
3814 SDValue N0 = N.getOperand(0);
3815 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
3817 uint64_t Mask = N0.getConstantOperandVal(1);
3818 if (isShiftedMask_64(Mask)) {
3819 unsigned C1 = N.getConstantOperandVal(1);
3820 unsigned XLen = Subtarget->getXLen();
3821 unsigned Leading = XLen - llvm::bit_width(Mask);
3822 unsigned Trailing = llvm::countr_zero(Mask);
3823 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
3824 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
3825 if (LeftShift && Leading == 32 && Trailing > 0 &&
3826 (Trailing + C1) == ShAmt) {
3827 SDLoc DL(N);
3828 EVT VT = N.getValueType();
3829 Val = SDValue(CurDAG->getMachineNode(
3830 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3831 CurDAG->getTargetConstant(Trailing, DL, VT)),
3832 0);
3833 return true;
3834 }
3835 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3836 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3837 if (!LeftShift && Leading == 32 && Trailing > C1 &&
3838 (Trailing - C1) == ShAmt) {
3839 SDLoc DL(N);
3840 EVT VT = N.getValueType();
3841 Val = SDValue(CurDAG->getMachineNode(
3842 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3843 CurDAG->getTargetConstant(Trailing, DL, VT)),
3844 0);
3845 return true;
3846 }
3847 }
3848 }
3849 }
3850
3851 return false;
3852}
3853
3854/// Look for various patterns that can be done with a SHL that can be folded
3855/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3856/// SHXADD_UW we are trying to match.
3858 SDValue &Val) {
3859 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
3860 N.hasOneUse()) {
3861 SDValue N0 = N.getOperand(0);
3862 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3863 N0.hasOneUse()) {
3864 uint64_t Mask = N.getConstantOperandVal(1);
3865 unsigned C2 = N0.getConstantOperandVal(1);
3866
3867 Mask &= maskTrailingZeros<uint64_t>(C2);
3868
3869 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3870 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3871 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3872 if (isShiftedMask_64(Mask)) {
3873 unsigned Leading = llvm::countl_zero(Mask);
3874 unsigned Trailing = llvm::countr_zero(Mask);
3875 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3876 SDLoc DL(N);
3877 EVT VT = N.getValueType();
3878 Val = SDValue(CurDAG->getMachineNode(
3879 RISCV::SLLI, DL, VT, N0.getOperand(0),
3880 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
3881 0);
3882 return true;
3883 }
3884 }
3885 }
3886 }
3887
3888 return false;
3889}
3890
3892 assert(N->getOpcode() == ISD::OR || N->getOpcode() == RISCVISD::OR_VL);
3893 if (N->getFlags().hasDisjoint())
3894 return true;
3895 return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
3896}
3897
3898bool RISCVDAGToDAGISel::selectImm64IfCheaper(int64_t Imm, int64_t OrigImm,
3899 SDValue N, SDValue &Val) {
3900 int OrigCost = RISCVMatInt::getIntMatCost(APInt(64, OrigImm), 64, *Subtarget,
3901 /*CompressionCost=*/true);
3902 int Cost = RISCVMatInt::getIntMatCost(APInt(64, Imm), 64, *Subtarget,
3903 /*CompressionCost=*/true);
3904 if (OrigCost <= Cost)
3905 return false;
3906
3907 Val = selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), Imm, *Subtarget);
3908 return true;
3909}
3910
3912 if (!isa<ConstantSDNode>(N))
3913 return false;
3914 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3915 if ((Imm >> 31) != 1)
3916 return false;
3917
3918 for (const SDNode *U : N->users()) {
3919 switch (U->getOpcode()) {
3920 case ISD::ADD:
3921 break;
3922 case ISD::OR:
3923 if (orDisjoint(U))
3924 break;
3925 return false;
3926 default:
3927 return false;
3928 }
3929 }
3930
3931 return selectImm64IfCheaper(0xffffffff00000000 | Imm, Imm, N, Val);
3932}
3933
3935 if (!isa<ConstantSDNode>(N))
3936 return false;
3937 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3938 if (isInt<32>(Imm))
3939 return false;
3940
3941 for (const SDNode *U : N->users()) {
3942 switch (U->getOpcode()) {
3943 case ISD::ADD:
3944 break;
3945 case RISCVISD::VMV_V_X_VL:
3946 if (!all_of(U->users(), [](const SDNode *V) {
3947 return V->getOpcode() == ISD::ADD ||
3948 V->getOpcode() == RISCVISD::ADD_VL;
3949 }))
3950 return false;
3951 break;
3952 default:
3953 return false;
3954 }
3955 }
3956
3957 return selectImm64IfCheaper(-Imm, Imm, N, Val);
3958}
3959
3961 if (!isa<ConstantSDNode>(N))
3962 return false;
3963 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3964
3965 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
3966 if (isInt<32>(Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
3967 return false;
3968
3969 // Abandon this transform if the constant is needed elsewhere.
3970 for (const SDNode *U : N->users()) {
3971 switch (U->getOpcode()) {
3972 case ISD::AND:
3973 case ISD::OR:
3974 case ISD::XOR:
3975 if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
3976 return false;
3977 break;
3978 case RISCVISD::VMV_V_X_VL:
3979 if (!Subtarget->hasStdExtZvkb())
3980 return false;
3981 if (!all_of(U->users(), [](const SDNode *V) {
3982 return V->getOpcode() == ISD::AND ||
3983 V->getOpcode() == RISCVISD::AND_VL;
3984 }))
3985 return false;
3986 break;
3987 default:
3988 return false;
3989 }
3990 }
3991
3992 if (isInt<32>(Imm)) {
3993 Val =
3994 selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), ~Imm, *Subtarget);
3995 return true;
3996 }
3997
3998 // For 64-bit constants, the instruction sequences get complex,
3999 // so we select inverted only if it's cheaper.
4000 return selectImm64IfCheaper(~Imm, Imm, N, Val);
4001}
4002
4003static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
4004 unsigned Bits,
4005 const TargetInstrInfo *TII) {
4006 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
4007
4008 if (!MCOpcode)
4009 return false;
4010
4011 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
4012 const uint64_t TSFlags = MCID.TSFlags;
4013 if (!RISCVII::hasSEWOp(TSFlags))
4014 return false;
4015 assert(RISCVII::hasVLOp(TSFlags));
4016
4017 unsigned ChainOpIdx = User->getNumOperands() - 1;
4018 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
4019 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
4020 unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
4021 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
4022
4023 if (UserOpNo == VLIdx)
4024 return false;
4025
4026 auto NumDemandedBits =
4027 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
4028 return NumDemandedBits && Bits >= *NumDemandedBits;
4029}
4030
4031// Return true if all users of this SDNode* only consume the lower \p Bits.
4032// This can be used to form W instructions for add/sub/mul/shl even when the
4033// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
4034// SimplifyDemandedBits has made it so some users see a sext_inreg and some
4035// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
4036// the add/sub/mul/shl to become non-W instructions. By checking the users we
4037// may be able to use a W instruction and CSE with the other instruction if
4038// this has happened. We could try to detect that the CSE opportunity exists
4039// before doing this, but that would be more complicated.
4041 const unsigned Depth) const {
4042 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
4043 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
4044 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
4045 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
4046 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
4047 isa<ConstantSDNode>(Node) || Depth != 0) &&
4048 "Unexpected opcode");
4049
4051 return false;
4052
4053 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
4054 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
4055 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
4056 return false;
4057
4058 for (SDUse &Use : Node->uses()) {
4059 SDNode *User = Use.getUser();
4060 // Users of this node should have already been instruction selected
4061 if (!User->isMachineOpcode())
4062 return false;
4063
4064 // TODO: Add more opcodes?
4065 switch (User->getMachineOpcode()) {
4066 default:
4068 break;
4069 return false;
4070 case RISCV::ADDW:
4071 case RISCV::ADDIW:
4072 case RISCV::SUBW:
4073 case RISCV::MULW:
4074 case RISCV::SLLW:
4075 case RISCV::SLLIW:
4076 case RISCV::SRAW:
4077 case RISCV::SRAIW:
4078 case RISCV::SRLW:
4079 case RISCV::SRLIW:
4080 case RISCV::DIVW:
4081 case RISCV::DIVUW:
4082 case RISCV::REMW:
4083 case RISCV::REMUW:
4084 case RISCV::ROLW:
4085 case RISCV::RORW:
4086 case RISCV::RORIW:
4087 case RISCV::CLZW:
4088 case RISCV::CTZW:
4089 case RISCV::CPOPW:
4090 case RISCV::SLLI_UW:
4091 case RISCV::ABSW:
4092 case RISCV::FMV_W_X:
4093 case RISCV::FCVT_H_W:
4094 case RISCV::FCVT_H_W_INX:
4095 case RISCV::FCVT_H_WU:
4096 case RISCV::FCVT_H_WU_INX:
4097 case RISCV::FCVT_S_W:
4098 case RISCV::FCVT_S_W_INX:
4099 case RISCV::FCVT_S_WU:
4100 case RISCV::FCVT_S_WU_INX:
4101 case RISCV::FCVT_D_W:
4102 case RISCV::FCVT_D_W_INX:
4103 case RISCV::FCVT_D_WU:
4104 case RISCV::FCVT_D_WU_INX:
4105 case RISCV::TH_REVW:
4106 case RISCV::TH_SRRIW:
4107 if (Bits >= 32)
4108 break;
4109 return false;
4110 case RISCV::SLL:
4111 case RISCV::SRA:
4112 case RISCV::SRL:
4113 case RISCV::ROL:
4114 case RISCV::ROR:
4115 case RISCV::BSET:
4116 case RISCV::BCLR:
4117 case RISCV::BINV:
4118 // Shift amount operands only use log2(Xlen) bits.
4119 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
4120 break;
4121 return false;
4122 case RISCV::SLLI:
4123 // SLLI only uses the lower (XLen - ShAmt) bits.
4124 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
4125 break;
4126 return false;
4127 case RISCV::ANDI:
4128 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
4129 break;
4130 goto RecCheck;
4131 case RISCV::ORI: {
4132 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
4133 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
4134 break;
4135 [[fallthrough]];
4136 }
4137 case RISCV::AND:
4138 case RISCV::OR:
4139 case RISCV::XOR:
4140 case RISCV::XORI:
4141 case RISCV::ANDN:
4142 case RISCV::ORN:
4143 case RISCV::XNOR:
4144 case RISCV::SH1ADD:
4145 case RISCV::SH2ADD:
4146 case RISCV::SH3ADD:
4147 RecCheck:
4148 if (hasAllNBitUsers(User, Bits, Depth + 1))
4149 break;
4150 return false;
4151 case RISCV::SRLI: {
4152 unsigned ShAmt = User->getConstantOperandVal(1);
4153 // If we are shifting right by less than Bits, and users don't demand any
4154 // bits that were shifted into [Bits-1:0], then we can consider this as an
4155 // N-Bit user.
4156 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
4157 break;
4158 return false;
4159 }
4160 case RISCV::SEXT_B:
4161 case RISCV::PACKH:
4162 if (Bits >= 8)
4163 break;
4164 return false;
4165 case RISCV::SEXT_H:
4166 case RISCV::FMV_H_X:
4167 case RISCV::ZEXT_H_RV32:
4168 case RISCV::ZEXT_H_RV64:
4169 case RISCV::PACKW:
4170 if (Bits >= 16)
4171 break;
4172 return false;
4173 case RISCV::PACK:
4174 if (Bits >= (Subtarget->getXLen() / 2))
4175 break;
4176 return false;
4177 case RISCV::ADD_UW:
4178 case RISCV::SH1ADD_UW:
4179 case RISCV::SH2ADD_UW:
4180 case RISCV::SH3ADD_UW:
4181 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
4182 // 32 bits.
4183 if (Use.getOperandNo() == 0 && Bits >= 32)
4184 break;
4185 return false;
4186 case RISCV::SB:
4187 if (Use.getOperandNo() == 0 && Bits >= 8)
4188 break;
4189 return false;
4190 case RISCV::SH:
4191 if (Use.getOperandNo() == 0 && Bits >= 16)
4192 break;
4193 return false;
4194 case RISCV::SW:
4195 if (Use.getOperandNo() == 0 && Bits >= 32)
4196 break;
4197 return false;
4198 case RISCV::TH_EXT:
4199 case RISCV::TH_EXTU: {
4200 unsigned Msb = User->getConstantOperandVal(1);
4201 unsigned Lsb = User->getConstantOperandVal(2);
4202 // Behavior of Msb < Lsb is not well documented.
4203 if (Msb >= Lsb && Bits > Msb)
4204 break;
4205 return false;
4206 }
4207 }
4208 }
4209
4210 return true;
4211}
4212
4213// Select a constant that can be represented as (sign_extend(imm5) << imm2).
4215 SDValue &Shl2) {
4216 auto *C = dyn_cast<ConstantSDNode>(N);
4217 if (!C)
4218 return false;
4219
4220 int64_t Offset = C->getSExtValue();
4221 for (unsigned Shift = 0; Shift < 4; Shift++) {
4222 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) {
4223 EVT VT = N->getValueType(0);
4224 Simm5 = CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(N), VT);
4225 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), VT);
4226 return true;
4227 }
4228 }
4229
4230 return false;
4231}
4232
4233// Select VL as a 5 bit immediate or a value that will become a register. This
4234// allows us to choose between VSETIVLI or VSETVLI later.
4236 auto *C = dyn_cast<ConstantSDNode>(N);
4237 if (C && isUInt<5>(C->getZExtValue())) {
4238 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
4239 N->getValueType(0));
4240 } else if (C && C->isAllOnes()) {
4241 // Treat all ones as VLMax.
4242 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4243 N->getValueType(0));
4244 } else if (isa<RegisterSDNode>(N) &&
4245 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
4246 // All our VL operands use an operand that allows GPRNoX0 or an immediate
4247 // as the register class. Convert X0 to a special immediate to pass the
4248 // MachineVerifier. This is recognized specially by the vsetvli insertion
4249 // pass.
4250 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4251 N->getValueType(0));
4252 } else {
4253 VL = N;
4254 }
4255
4256 return true;
4257}
4258
4260 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
4261 if (!N.getOperand(0).isUndef())
4262 return SDValue();
4263 N = N.getOperand(1);
4264 }
4265 SDValue Splat = N;
4266 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
4267 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
4268 !Splat.getOperand(0).isUndef())
4269 return SDValue();
4270 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
4271 return Splat;
4272}
4273
4276 if (!Splat)
4277 return false;
4278
4279 SplatVal = Splat.getOperand(1);
4280 return true;
4281}
4282
4284 SelectionDAG &DAG,
4285 const RISCVSubtarget &Subtarget,
4286 std::function<bool(int64_t)> ValidateImm,
4287 bool Decrement = false) {
4289 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
4290 return false;
4291
4292 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
4293 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
4294 "Unexpected splat operand type");
4295
4296 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
4297 // type is wider than the resulting vector element type: an implicit
4298 // truncation first takes place. Therefore, perform a manual
4299 // truncation/sign-extension in order to ignore any truncated bits and catch
4300 // any zero-extended immediate.
4301 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
4302 // sign-extending to (XLenVT -1).
4303 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
4304
4305 int64_t SplatImm = SplatConst.getSExtValue();
4306
4307 if (!ValidateImm(SplatImm))
4308 return false;
4309
4310 if (Decrement)
4311 SplatImm -= 1;
4312
4313 SplatVal =
4314 DAG.getSignedTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
4315 return true;
4316}
4317
4319 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
4320 [](int64_t Imm) { return isInt<5>(Imm); });
4321}
4322
4324 return selectVSplatImmHelper(
4325 N, SplatVal, *CurDAG, *Subtarget,
4326 [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4327 /*Decrement=*/true);
4328}
4329
4331 return selectVSplatImmHelper(
4332 N, SplatVal, *CurDAG, *Subtarget,
4333 [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4334 /*Decrement=*/false);
4335}
4336
4338 SDValue &SplatVal) {
4339 return selectVSplatImmHelper(
4340 N, SplatVal, *CurDAG, *Subtarget,
4341 [](int64_t Imm) { return Imm != 0 && Imm >= -15 && Imm <= 16; },
4342 /*Decrement=*/true);
4343}
4344
4346 SDValue &SplatVal) {
4347 return selectVSplatImmHelper(
4348 N, SplatVal, *CurDAG, *Subtarget,
4349 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
4350}
4351
4354 return Splat && selectNegImm(Splat.getOperand(1), SplatVal);
4355}
4356
4358 auto IsExtOrTrunc = [](SDValue N) {
4359 switch (N->getOpcode()) {
4360 case ISD::SIGN_EXTEND:
4361 case ISD::ZERO_EXTEND:
4362 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
4363 // inactive elements will be undef.
4364 case RISCVISD::TRUNCATE_VECTOR_VL:
4365 case RISCVISD::VSEXT_VL:
4366 case RISCVISD::VZEXT_VL:
4367 return true;
4368 default:
4369 return false;
4370 }
4371 };
4372
4373 // We can have multiple nested nodes, so unravel them all if needed.
4374 while (IsExtOrTrunc(N)) {
4375 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
4376 return false;
4377 N = N->getOperand(0);
4378 }
4379
4380 return selectVSplat(N, SplatVal);
4381}
4382
4384 // Allow bitcasts from XLenVT -> FP.
4385 if (N.getOpcode() == ISD::BITCAST &&
4386 N.getOperand(0).getValueType() == Subtarget->getXLenVT()) {
4387 Imm = N.getOperand(0);
4388 return true;
4389 }
4390 // Allow moves from XLenVT to FP.
4391 if (N.getOpcode() == RISCVISD::FMV_H_X ||
4392 N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
4393 Imm = N.getOperand(0);
4394 return true;
4395 }
4396
4397 // Otherwise, look for FP constants that can materialized with scalar int.
4399 if (!CFP)
4400 return false;
4401 const APFloat &APF = CFP->getValueAPF();
4402 // td can handle +0.0 already.
4403 if (APF.isPosZero())
4404 return false;
4405
4406 MVT VT = CFP->getSimpleValueType(0);
4407
4408 MVT XLenVT = Subtarget->getXLenVT();
4409 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
4410 assert(APF.isNegZero() && "Unexpected constant.");
4411 return false;
4412 }
4413 SDLoc DL(N);
4414 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
4415 *Subtarget);
4416 return true;
4417}
4418
4420 SDValue &Imm) {
4421 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
4422 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
4423
4424 if (!isInt<5>(ImmVal))
4425 return false;
4426
4427 Imm = CurDAG->getSignedTargetConstant(ImmVal, SDLoc(N),
4428 Subtarget->getXLenVT());
4429 return true;
4430 }
4431
4432 return false;
4433}
4434
4435// Try to remove sext.w if the input is a W instruction or can be made into
4436// a W instruction cheaply.
4437bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
4438 // Look for the sext.w pattern, addiw rd, rs1, 0.
4439 if (N->getMachineOpcode() != RISCV::ADDIW ||
4440 !isNullConstant(N->getOperand(1)))
4441 return false;
4442
4443 SDValue N0 = N->getOperand(0);
4444 if (!N0.isMachineOpcode())
4445 return false;
4446
4447 switch (N0.getMachineOpcode()) {
4448 default:
4449 break;
4450 case RISCV::ADD:
4451 case RISCV::ADDI:
4452 case RISCV::SUB:
4453 case RISCV::MUL:
4454 case RISCV::SLLI: {
4455 // Convert sext.w+add/sub/mul to their W instructions. This will create
4456 // a new independent instruction. This improves latency.
4457 unsigned Opc;
4458 switch (N0.getMachineOpcode()) {
4459 default:
4460 llvm_unreachable("Unexpected opcode!");
4461 case RISCV::ADD: Opc = RISCV::ADDW; break;
4462 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
4463 case RISCV::SUB: Opc = RISCV::SUBW; break;
4464 case RISCV::MUL: Opc = RISCV::MULW; break;
4465 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
4466 }
4467
4468 SDValue N00 = N0.getOperand(0);
4469 SDValue N01 = N0.getOperand(1);
4470
4471 // Shift amount needs to be uimm5.
4472 if (N0.getMachineOpcode() == RISCV::SLLI &&
4473 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
4474 break;
4475
4476 SDNode *Result =
4477 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
4478 N00, N01);
4479 ReplaceUses(N, Result);
4480 return true;
4481 }
4482 case RISCV::ADDW:
4483 case RISCV::ADDIW:
4484 case RISCV::SUBW:
4485 case RISCV::MULW:
4486 case RISCV::SLLIW:
4487 case RISCV::PACKW:
4488 case RISCV::TH_MULAW:
4489 case RISCV::TH_MULAH:
4490 case RISCV::TH_MULSW:
4491 case RISCV::TH_MULSH:
4492 if (N0.getValueType() == MVT::i32)
4493 break;
4494
4495 // Result is already sign extended just remove the sext.w.
4496 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
4497 ReplaceUses(N, N0.getNode());
4498 return true;
4499 }
4500
4501 return false;
4502}
4503
4504static bool usesAllOnesMask(SDValue MaskOp) {
4505 const auto IsVMSet = [](unsigned Opc) {
4506 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
4507 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
4508 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
4509 Opc == RISCV::PseudoVMSET_M_B8;
4510 };
4511
4512 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
4513 // undefined behaviour if it's the wrong bitwidth, so we could choose to
4514 // assume that it's all-ones? Same applies to its VL.
4515 return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode());
4516}
4517
4518static bool isImplicitDef(SDValue V) {
4519 if (!V.isMachineOpcode())
4520 return false;
4521 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
4522 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
4523 if (!isImplicitDef(V.getOperand(I)))
4524 return false;
4525 return true;
4526 }
4527 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
4528}
4529
4530// Optimize masked RVV pseudo instructions with a known all-ones mask to their
4531// corresponding "unmasked" pseudo versions.
4532bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
4533 const RISCV::RISCVMaskedPseudoInfo *I =
4534 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
4535 if (!I)
4536 return false;
4537
4538 unsigned MaskOpIdx = I->MaskOpIdx;
4539 if (!usesAllOnesMask(N->getOperand(MaskOpIdx)))
4540 return false;
4541
4542 // There are two classes of pseudos in the table - compares and
4543 // everything else. See the comment on RISCVMaskedPseudo for details.
4544 const unsigned Opc = I->UnmaskedPseudo;
4545 const MCInstrDesc &MCID = TII->get(Opc);
4546 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID);
4547
4548 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
4549 const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(MaskedMCID);
4550
4551 assert((RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ||
4553 "Unmasked pseudo has policy but masked pseudo doesn't?");
4554 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru &&
4555 "Unexpected pseudo structure");
4556 assert(!(HasPassthru && !MaskedHasPassthru) &&
4557 "Unmasked pseudo has passthru but masked pseudo doesn't?");
4558
4560 // Skip the passthru operand at index 0 if the unmasked don't have one.
4561 bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
4562 bool DropPolicy = !RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
4563 RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags);
4564 bool HasChainOp =
4565 N->getOperand(N->getNumOperands() - 1).getValueType() == MVT::Other;
4566 unsigned LastOpNum = N->getNumOperands() - 1 - HasChainOp;
4567 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
4568 // Skip the mask
4569 SDValue Op = N->getOperand(I);
4570 if (I == MaskOpIdx)
4571 continue;
4572 if (DropPolicy && I == LastOpNum)
4573 continue;
4574 Ops.push_back(Op);
4575 }
4576
4577 MachineSDNode *Result =
4578 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4579
4580 if (!N->memoperands_empty())
4581 CurDAG->setNodeMemRefs(Result, N->memoperands());
4582
4583 Result->setFlags(N->getFlags());
4584 ReplaceUses(N, Result);
4585
4586 return true;
4587}
4588
4589/// If our passthru is an implicit_def, use noreg instead. This side
4590/// steps issues with MachineCSE not being able to CSE expressions with
4591/// IMPLICIT_DEF operands while preserving the semantic intent. See
4592/// pr64282 for context. Note that this transform is the last one
4593/// performed at ISEL DAG to DAG.
4594bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4595 bool MadeChange = false;
4596 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4597
4598 while (Position != CurDAG->allnodes_begin()) {
4599 SDNode *N = &*--Position;
4600 if (N->use_empty() || !N->isMachineOpcode())
4601 continue;
4602
4603 const unsigned Opc = N->getMachineOpcode();
4604 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
4606 !isImplicitDef(N->getOperand(0)))
4607 continue;
4608
4610 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
4611 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4612 SDValue Op = N->getOperand(I);
4613 Ops.push_back(Op);
4614 }
4615
4616 MachineSDNode *Result =
4617 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4618 Result->setFlags(N->getFlags());
4619 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
4620 ReplaceUses(N, Result);
4621 MadeChange = true;
4622 }
4623 return MadeChange;
4624}
4625
4626
4627// This pass converts a legalized DAG into a RISCV-specific DAG, ready
4628// for instruction scheduling.
4630 CodeGenOptLevel OptLevel) {
4631 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4632}
4633
4635
4640
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool getVal(MDTuple *MD, const char *Key, uint64_t &Val)
static bool usesAllOnesMask(SDValue MaskOp)
static Register getTileReg(uint64_t TileNum)
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
static bool isRegRegScaleLoadOrStore(SDNode *User, SDValue Add, const RISCVSubtarget &Subtarget)
Return true if this a load/store that we have a RegRegScale instruction for.
#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm, bool Decrement=false)
static unsigned getSegInstNF(unsigned Intrinsic)
static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget, SDValue Add, SDValue Shift=SDValue())
Is it profitable to fold this Add into RegRegScale load/store.
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false)
#define INST_ALL_NF_CASE_WITH_FF(NAME)
#define CASE_VMSLT_OPCODES(lmulenum, suffix)
bool isRegImmLoadOrStore(SDNode *User, SDValue Add)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
static SDValue findVSplat(SDValue N)
#define INST_ALL_NF_CASE(NAME)
static bool isApplicableToPLI(int Val)
Contains matchers for matching SelectionDAG nodes and values.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
#define PASS_NAME
DEMANGLE_DUMP_METHOD void dump() const
bool isZero() const
Definition APFloat.h:1427
APInt bitcastToAPInt() const
Definition APFloat.h:1335
bool isPosZero() const
Definition APFloat.h:1442
bool isNegZero() const
Definition APFloat.h:1443
Class for arbitrary precision integers.
Definition APInt.h:78
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
This class is used to form a handle around another node that is persistent and is updated across invo...
const SDValue & getValue() const
static StringRef getMemConstraintName(ConstraintCode C)
Definition InlineAsm.h:470
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Machine Value Type.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TargetMachine, CodeGenOptLevel OptLevel)
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectNegImm(SDValue N, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool areOffsetsWithinAlignment(SDValue Addr, Align Alignment)
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset should be all zeros.
bool selectZExtImm32(SDValue N, SDValue &Val)
bool SelectAddrRegZextRegScale(SDValue Addr, unsigned MaxShiftAmount, unsigned Bits, SDValue &Base, SDValue &Index, SDValue &Scale)
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset)
void selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
void selectVLSEGFF(SDNode *Node, unsigned NF, bool IsMasked)
bool selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool orDisjoint(const SDNode *Node) const
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool tryUnsignedBitfieldInsertInZero(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
bool selectInvLogicImm(SDValue N, SDValue &Val)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
void selectXSfmmVSET(SDNode *Node)
bool trySignedBitfieldInsertInSign(SDNode *Node)
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool SelectAddrRegImm9(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
bool selectScalarFPAsInt(SDValue N, SDValue &Imm)
bool hasAllBUsers(SDNode *Node) const
void selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
void selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool selectVSplatImm64Neg(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
void selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
bool hasShlAdd(int64_t ShAmt) const
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVVType::VLMUL getLMUL(MVT VT)
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
iterator_range< user_iterator > users()
Definition Value.h:426
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI unsigned encodeXSfmmVType(unsigned SEW, unsigned Widen, bool AltFmt)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
LLVM_ABI unsigned getSEWLMULRatio(unsigned SEW, VLMUL VLMul)
static unsigned decodeTWiden(unsigned TWiden)
LLVM_ABI unsigned encodeVTYPE(VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic, bool AltFmt=false)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
std::optional< unsigned > getVectorLowDemandedScalarBits(unsigned Opcode, unsigned Log2SEW)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
static const MachineMemOperand::Flags MONontemporalBit1
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
unsigned M1(unsigned Val)
Definition VE.h:377
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
constexpr T maskTrailingZeros(unsigned N)
Create a bitmask with the N right-most bits set to 0, and all other bits set to 1.
Definition MathExtras.h:94
@ Add
Sum of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:867
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.