LLVM 22.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
22#include "llvm/IR/IntrinsicsRISCV.h"
24#include "llvm/Support/Debug.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "riscv-isel"
31#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
32
34 "riscv-use-rematerializable-movimm", cl::Hidden,
35 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
36 "constant materialization"),
37 cl::init(false));
38
39#define GET_DAGISEL_BODY RISCVDAGToDAGISel
40#include "RISCVGenDAGISel.inc"
41
43 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
44
45 bool MadeChange = false;
46 while (Position != CurDAG->allnodes_begin()) {
47 SDNode *N = &*--Position;
48 if (N->use_empty())
49 continue;
50
51 SDValue Result;
52 switch (N->getOpcode()) {
53 case ISD::SPLAT_VECTOR: {
54 if (Subtarget->enablePExtCodeGen())
55 break;
56 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
57 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
58 MVT VT = N->getSimpleValueType(0);
59 unsigned Opc =
60 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
61 SDLoc DL(N);
62 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
63 SDValue Src = N->getOperand(0);
64 if (VT.isInteger())
65 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
66 N->getOperand(0));
67 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
68 break;
69 }
70 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
71 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
72 // load. Done after lowering and combining so that we have a chance to
73 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
74 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
75 MVT VT = N->getSimpleValueType(0);
76 SDValue Passthru = N->getOperand(0);
77 SDValue Lo = N->getOperand(1);
78 SDValue Hi = N->getOperand(2);
79 SDValue VL = N->getOperand(3);
80 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
81 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
82 "Unexpected VTs!");
83 MachineFunction &MF = CurDAG->getMachineFunction();
84 SDLoc DL(N);
85
86 // Create temporary stack for each expanding node.
87 SDValue StackSlot =
88 CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
89 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
91
92 SDValue Chain = CurDAG->getEntryNode();
93 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
94
95 SDValue OffsetSlot =
96 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
97 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
98 Align(8));
99
100 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
101
102 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
103 SDValue IntID =
104 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
105 SDValue Ops[] = {Chain,
106 IntID,
107 Passthru,
108 StackSlot,
109 CurDAG->getRegister(RISCV::X0, MVT::i64),
110 VL};
111
112 Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
113 MVT::i64, MPI, Align(8),
115 break;
116 }
117 case ISD::FP_EXTEND: {
118 // We only have vector patterns for riscv_fpextend_vl in isel.
119 SDLoc DL(N);
120 MVT VT = N->getSimpleValueType(0);
121 if (!VT.isVector())
122 break;
123 SDValue VLMAX = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
124 SDValue TrueMask = CurDAG->getNode(
125 RISCVISD::VMSET_VL, DL, VT.changeVectorElementType(MVT::i1), VLMAX);
126 Result = CurDAG->getNode(RISCVISD::FP_EXTEND_VL, DL, VT, N->getOperand(0),
127 TrueMask, VLMAX);
128 break;
129 }
130 }
131
132 if (Result) {
133 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
134 LLVM_DEBUG(N->dump(CurDAG));
135 LLVM_DEBUG(dbgs() << "\nNew: ");
136 LLVM_DEBUG(Result->dump(CurDAG));
137 LLVM_DEBUG(dbgs() << "\n");
138
139 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
140 MadeChange = true;
141 }
142 }
143
144 if (MadeChange)
145 CurDAG->RemoveDeadNodes();
146}
147
149 HandleSDNode Dummy(CurDAG->getRoot());
150 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
151
152 bool MadeChange = false;
153 while (Position != CurDAG->allnodes_begin()) {
154 SDNode *N = &*--Position;
155 // Skip dead nodes and any non-machine opcodes.
156 if (N->use_empty() || !N->isMachineOpcode())
157 continue;
158
159 MadeChange |= doPeepholeSExtW(N);
160
161 // FIXME: This is here only because the VMerge transform doesn't
162 // know how to handle masked true inputs. Once that has been moved
163 // to post-ISEL, this can be deleted as well.
164 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
165 }
166
167 CurDAG->setRoot(Dummy.getValue());
168
169 // After we're done with everything else, convert IMPLICIT_DEF
170 // passthru operands to NoRegister. This is required to workaround
171 // an optimization deficiency in MachineCSE. This really should
172 // be merged back into each of the patterns (i.e. there's no good
173 // reason not to go directly to NoReg), but is being done this way
174 // to allow easy backporting.
175 MadeChange |= doPeepholeNoRegPassThru();
176
177 if (MadeChange)
178 CurDAG->RemoveDeadNodes();
179}
180
181static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
183 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
184 for (const RISCVMatInt::Inst &Inst : Seq) {
185 SDValue SDImm = CurDAG->getSignedTargetConstant(Inst.getImm(), DL, VT);
186 SDNode *Result = nullptr;
187 switch (Inst.getOpndKind()) {
188 case RISCVMatInt::Imm:
189 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
190 break;
192 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
193 CurDAG->getRegister(RISCV::X0, VT));
194 break;
196 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
197 break;
199 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
200 break;
201 }
202
203 // Only the first instruction has X0 as its source.
204 SrcReg = SDValue(Result, 0);
205 }
206
207 return SrcReg;
208}
209
210static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
211 int64_t Imm, const RISCVSubtarget &Subtarget) {
213
214 // Use a rematerializable pseudo instruction for short sequences if enabled.
215 if (Seq.size() == 2 && UsePseudoMovImm)
216 return SDValue(
217 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
218 CurDAG->getSignedTargetConstant(Imm, DL, VT)),
219 0);
220
221 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
222 // worst an LUI+ADDIW. This will require an extra register, but avoids a
223 // constant pool.
224 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
225 // low and high 32 bits are the same and bit 31 and 63 are set.
226 if (Seq.size() > 3) {
227 unsigned ShiftAmt, AddOpc;
229 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
230 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
231 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
232
233 SDValue SLLI = SDValue(
234 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
235 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
236 0);
237 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
238 }
239 }
240
241 // Otherwise, use the original sequence.
242 return selectImmSeq(CurDAG, DL, VT, Seq);
243}
244
246 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
247 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
248 bool IsLoad, MVT *IndexVT) {
249 SDValue Chain = Node->getOperand(0);
250
251 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
252
253 if (IsStridedOrIndexed) {
254 Operands.push_back(Node->getOperand(CurOp++)); // Index.
255 if (IndexVT)
256 *IndexVT = Operands.back()->getSimpleValueType(0);
257 }
258
259 if (IsMasked) {
260 SDValue Mask = Node->getOperand(CurOp++);
261 Operands.push_back(Mask);
262 }
263 SDValue VL;
264 selectVLOp(Node->getOperand(CurOp++), VL);
265 Operands.push_back(VL);
266
267 MVT XLenVT = Subtarget->getXLenVT();
268 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
269 Operands.push_back(SEWOp);
270
271 // At the IR layer, all the masked load intrinsics have policy operands,
272 // none of the others do. All have passthru operands. For our pseudos,
273 // all loads have policy operands.
274 if (IsLoad) {
276 if (IsMasked)
277 Policy = Node->getConstantOperandVal(CurOp++);
278 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
279 Operands.push_back(PolicyOp);
280 }
281
282 Operands.push_back(Chain); // Chain.
283}
284
285void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
286 bool IsStrided) {
287 SDLoc DL(Node);
288 MVT VT = Node->getSimpleValueType(0);
289 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
291
292 unsigned CurOp = 2;
294
295 Operands.push_back(Node->getOperand(CurOp++));
296
297 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
298 Operands, /*IsLoad=*/true);
299
300 const RISCV::VLSEGPseudo *P =
301 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
302 static_cast<unsigned>(LMUL));
303 MachineSDNode *Load =
304 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
305
306 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
307
308 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
309 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
310 CurDAG->RemoveDeadNode(Node);
311}
312
314 bool IsMasked) {
315 SDLoc DL(Node);
316 MVT VT = Node->getSimpleValueType(0);
317 MVT XLenVT = Subtarget->getXLenVT();
318 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
320
321 unsigned CurOp = 2;
323
324 Operands.push_back(Node->getOperand(CurOp++));
325
326 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
327 /*IsStridedOrIndexed*/ false, Operands,
328 /*IsLoad=*/true);
329
330 const RISCV::VLSEGPseudo *P =
331 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
332 Log2SEW, static_cast<unsigned>(LMUL));
333 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
334 XLenVT, MVT::Other, Operands);
335
336 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
337
338 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); // Result
339 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); // VL
340 ReplaceUses(SDValue(Node, 2), SDValue(Load, 2)); // Chain
341 CurDAG->RemoveDeadNode(Node);
342}
343
344void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
345 bool IsOrdered) {
346 SDLoc DL(Node);
347 MVT VT = Node->getSimpleValueType(0);
348 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
350
351 unsigned CurOp = 2;
353
354 Operands.push_back(Node->getOperand(CurOp++));
355
356 MVT IndexVT;
357 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
358 /*IsStridedOrIndexed*/ true, Operands,
359 /*IsLoad=*/true, &IndexVT);
360
361#ifndef NDEBUG
362 // Number of element = RVVBitsPerBlock * LMUL / SEW
363 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
364 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
365 if (DecodedLMUL.second)
366 ContainedTyNumElts /= DecodedLMUL.first;
367 else
368 ContainedTyNumElts *= DecodedLMUL.first;
369 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
370 "Element count mismatch");
371#endif
372
374 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
375 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
376 reportFatalUsageError("The V extension does not support EEW=64 for index "
377 "values when XLEN=32");
378 }
379 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
380 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
381 static_cast<unsigned>(IndexLMUL));
382 MachineSDNode *Load =
383 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
384
385 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
386
387 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
388 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
389 CurDAG->RemoveDeadNode(Node);
390}
391
392void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
393 bool IsStrided) {
394 SDLoc DL(Node);
395 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
396 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
398
399 unsigned CurOp = 2;
401
402 Operands.push_back(Node->getOperand(CurOp++));
403
404 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
405 Operands);
406
407 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
408 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
409 MachineSDNode *Store =
410 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
411
412 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
413
414 ReplaceNode(Node, Store);
415}
416
417void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
418 bool IsOrdered) {
419 SDLoc DL(Node);
420 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
421 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
423
424 unsigned CurOp = 2;
426
427 Operands.push_back(Node->getOperand(CurOp++));
428
429 MVT IndexVT;
430 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
431 /*IsStridedOrIndexed*/ true, Operands,
432 /*IsLoad=*/false, &IndexVT);
433
434#ifndef NDEBUG
435 // Number of element = RVVBitsPerBlock * LMUL / SEW
436 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
437 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
438 if (DecodedLMUL.second)
439 ContainedTyNumElts /= DecodedLMUL.first;
440 else
441 ContainedTyNumElts *= DecodedLMUL.first;
442 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
443 "Element count mismatch");
444#endif
445
447 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
448 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
449 reportFatalUsageError("The V extension does not support EEW=64 for index "
450 "values when XLEN=32");
451 }
452 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
453 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
454 static_cast<unsigned>(IndexLMUL));
455 MachineSDNode *Store =
456 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
457
458 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
459
460 ReplaceNode(Node, Store);
461}
462
464 if (!Subtarget->hasVInstructions())
465 return;
466
467 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
468
469 SDLoc DL(Node);
470 MVT XLenVT = Subtarget->getXLenVT();
471
472 unsigned IntNo = Node->getConstantOperandVal(0);
473
474 assert((IntNo == Intrinsic::riscv_vsetvli ||
475 IntNo == Intrinsic::riscv_vsetvlimax) &&
476 "Unexpected vsetvli intrinsic");
477
478 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
479 unsigned Offset = (VLMax ? 1 : 2);
480
481 assert(Node->getNumOperands() == Offset + 2 &&
482 "Unexpected number of operands");
483
484 unsigned SEW =
485 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
486 RISCVVType::VLMUL VLMul = static_cast<RISCVVType::VLMUL>(
487 Node->getConstantOperandVal(Offset + 1) & 0x7);
488
489 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
490 /*MaskAgnostic*/ true);
491 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
492
493 SDValue VLOperand;
494 unsigned Opcode = RISCV::PseudoVSETVLI;
495 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
496 if (auto VLEN = Subtarget->getRealVLen())
497 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
498 VLMax = true;
499 }
500 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
501 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
502 Opcode = RISCV::PseudoVSETVLIX0;
503 } else {
504 VLOperand = Node->getOperand(1);
505
506 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
507 uint64_t AVL = C->getZExtValue();
508 if (isUInt<5>(AVL)) {
509 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
510 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
511 XLenVT, VLImm, VTypeIOp));
512 return;
513 }
514 }
515 }
516
518 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
519}
520
522 if (!Subtarget->hasVendorXSfmmbase())
523 return;
524
525 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
526
527 SDLoc DL(Node);
528 MVT XLenVT = Subtarget->getXLenVT();
529
530 unsigned IntNo = Node->getConstantOperandVal(0);
531
532 assert((IntNo == Intrinsic::riscv_sf_vsettnt ||
533 IntNo == Intrinsic::riscv_sf_vsettm ||
534 IntNo == Intrinsic::riscv_sf_vsettk) &&
535 "Unexpected XSfmm vset intrinsic");
536
537 unsigned SEW = RISCVVType::decodeVSEW(Node->getConstantOperandVal(2));
538 unsigned Widen = RISCVVType::decodeTWiden(Node->getConstantOperandVal(3));
539 unsigned PseudoOpCode =
540 IntNo == Intrinsic::riscv_sf_vsettnt ? RISCV::PseudoSF_VSETTNT
541 : IntNo == Intrinsic::riscv_sf_vsettm ? RISCV::PseudoSF_VSETTM
542 : RISCV::PseudoSF_VSETTK;
543
544 if (IntNo == Intrinsic::riscv_sf_vsettnt) {
545 unsigned VTypeI = RISCVVType::encodeXSfmmVType(SEW, Widen, 0);
546 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
547
548 ReplaceNode(Node, CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
549 Node->getOperand(1), VTypeIOp));
550 } else {
551 SDValue Log2SEW = CurDAG->getTargetConstant(Log2_32(SEW), DL, XLenVT);
552 SDValue TWiden = CurDAG->getTargetConstant(Widen, DL, XLenVT);
554 CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
555 Node->getOperand(1), Log2SEW, TWiden));
556 }
557}
558
560 MVT VT = Node->getSimpleValueType(0);
561 unsigned Opcode = Node->getOpcode();
562 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
563 "Unexpected opcode");
564 SDLoc DL(Node);
565
566 // For operations of the form (x << C1) op C2, check if we can use
567 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
568 SDValue N0 = Node->getOperand(0);
569 SDValue N1 = Node->getOperand(1);
570
572 if (!Cst)
573 return false;
574
575 int64_t Val = Cst->getSExtValue();
576
577 // Check if immediate can already use ANDI/ORI/XORI.
578 if (isInt<12>(Val))
579 return false;
580
581 SDValue Shift = N0;
582
583 // If Val is simm32 and we have a sext_inreg from i32, then the binop
584 // produces at least 33 sign bits. We can peek through the sext_inreg and use
585 // a SLLIW at the end.
586 bool SignExt = false;
587 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
588 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
589 SignExt = true;
590 Shift = N0.getOperand(0);
591 }
592
593 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
594 return false;
595
597 if (!ShlCst)
598 return false;
599
600 uint64_t ShAmt = ShlCst->getZExtValue();
601
602 // Make sure that we don't change the operation by removing bits.
603 // This only matters for OR and XOR, AND is unaffected.
604 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
605 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
606 return false;
607
608 int64_t ShiftedVal = Val >> ShAmt;
609 if (!isInt<12>(ShiftedVal))
610 return false;
611
612 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
613 if (SignExt && ShAmt >= 32)
614 return false;
615
616 // Ok, we can reorder to get a smaller immediate.
617 unsigned BinOpc;
618 switch (Opcode) {
619 default: llvm_unreachable("Unexpected opcode");
620 case ISD::AND: BinOpc = RISCV::ANDI; break;
621 case ISD::OR: BinOpc = RISCV::ORI; break;
622 case ISD::XOR: BinOpc = RISCV::XORI; break;
623 }
624
625 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
626
627 SDNode *BinOp = CurDAG->getMachineNode(
628 BinOpc, DL, VT, Shift.getOperand(0),
629 CurDAG->getSignedTargetConstant(ShiftedVal, DL, VT));
630 SDNode *SLLI =
631 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
632 CurDAG->getTargetConstant(ShAmt, DL, VT));
633 ReplaceNode(Node, SLLI);
634 return true;
635}
636
638 unsigned Opc;
639
640 if (Subtarget->hasVendorXTHeadBb())
641 Opc = RISCV::TH_EXT;
642 else if (Subtarget->hasVendorXAndesPerf())
643 Opc = RISCV::NDS_BFOS;
644 else if (Subtarget->hasVendorXqcibm())
645 Opc = RISCV::QC_EXT;
646 else
647 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
648 return false;
649
650 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
651 if (!N1C)
652 return false;
653
654 SDValue N0 = Node->getOperand(0);
655 if (!N0.hasOneUse())
656 return false;
657
658 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb,
659 const SDLoc &DL, MVT VT) {
660 if (Opc == RISCV::QC_EXT) {
661 // QC.EXT X, width, shamt
662 // shamt is the same as Lsb
663 // width is the number of bits to extract from the Lsb
664 Msb = Msb - Lsb + 1;
665 }
666 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
667 CurDAG->getTargetConstant(Msb, DL, VT),
668 CurDAG->getTargetConstant(Lsb, DL, VT));
669 };
670
671 SDLoc DL(Node);
672 MVT VT = Node->getSimpleValueType(0);
673 const unsigned RightShAmt = N1C->getZExtValue();
674
675 // Transform (sra (shl X, C1) C2) with C1 < C2
676 // -> (SignedBitfieldExtract X, msb, lsb)
677 if (N0.getOpcode() == ISD::SHL) {
678 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
679 if (!N01C)
680 return false;
681
682 const unsigned LeftShAmt = N01C->getZExtValue();
683 // Make sure that this is a bitfield extraction (i.e., the shift-right
684 // amount can not be less than the left-shift).
685 if (LeftShAmt > RightShAmt)
686 return false;
687
688 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
689 const unsigned Msb = MsbPlusOne - 1;
690 const unsigned Lsb = RightShAmt - LeftShAmt;
691
692 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
693 ReplaceNode(Node, Sbe);
694 return true;
695 }
696
697 // Transform (sra (sext_inreg X, _), C) ->
698 // (SignedBitfieldExtract X, msb, lsb)
699 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
700 unsigned ExtSize =
701 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
702
703 // ExtSize of 32 should use sraiw via tablegen pattern.
704 if (ExtSize == 32)
705 return false;
706
707 const unsigned Msb = ExtSize - 1;
708 // If the shift-right amount is greater than Msb, it means that extracts
709 // the X[Msb] bit and sign-extend it.
710 const unsigned Lsb = RightShAmt > Msb ? Msb : RightShAmt;
711
712 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
713 ReplaceNode(Node, Sbe);
714 return true;
715 }
716
717 return false;
718}
719
721 // Only supported with XAndesPerf at the moment.
722 if (!Subtarget->hasVendorXAndesPerf())
723 return false;
724
725 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
726 if (!N1C)
727 return false;
728
729 SDValue N0 = Node->getOperand(0);
730 if (!N0.hasOneUse())
731 return false;
732
733 auto BitfieldInsert = [&](SDValue N0, unsigned Msb, unsigned Lsb,
734 const SDLoc &DL, MVT VT) {
735 unsigned Opc = RISCV::NDS_BFOS;
736 // If the Lsb is equal to the Msb, then the Lsb should be 0.
737 if (Lsb == Msb)
738 Lsb = 0;
739 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
740 CurDAG->getTargetConstant(Lsb, DL, VT),
741 CurDAG->getTargetConstant(Msb, DL, VT));
742 };
743
744 SDLoc DL(Node);
745 MVT VT = Node->getSimpleValueType(0);
746 const unsigned RightShAmt = N1C->getZExtValue();
747
748 // Transform (sra (shl X, C1) C2) with C1 > C2
749 // -> (NDS.BFOS X, lsb, msb)
750 if (N0.getOpcode() == ISD::SHL) {
751 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
752 if (!N01C)
753 return false;
754
755 const unsigned LeftShAmt = N01C->getZExtValue();
756 // Make sure that this is a bitfield insertion (i.e., the shift-right
757 // amount should be less than the left-shift).
758 if (LeftShAmt <= RightShAmt)
759 return false;
760
761 const unsigned MsbPlusOne = VT.getSizeInBits() - RightShAmt;
762 const unsigned Msb = MsbPlusOne - 1;
763 const unsigned Lsb = LeftShAmt - RightShAmt;
764
765 SDNode *Sbi = BitfieldInsert(N0, Msb, Lsb, DL, VT);
766 ReplaceNode(Node, Sbi);
767 return true;
768 }
769
770 return false;
771}
772
774 const SDLoc &DL, MVT VT,
775 SDValue X, unsigned Msb,
776 unsigned Lsb) {
777 unsigned Opc;
778
779 if (Subtarget->hasVendorXTHeadBb()) {
780 Opc = RISCV::TH_EXTU;
781 } else if (Subtarget->hasVendorXAndesPerf()) {
782 Opc = RISCV::NDS_BFOZ;
783 } else if (Subtarget->hasVendorXqcibm()) {
784 Opc = RISCV::QC_EXTU;
785 // QC.EXTU X, width, shamt
786 // shamt is the same as Lsb
787 // width is the number of bits to extract from the Lsb
788 Msb = Msb - Lsb + 1;
789 } else {
790 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
791 return false;
792 }
793
794 SDNode *Ube = CurDAG->getMachineNode(Opc, DL, VT, X,
795 CurDAG->getTargetConstant(Msb, DL, VT),
796 CurDAG->getTargetConstant(Lsb, DL, VT));
797 ReplaceNode(Node, Ube);
798 return true;
799}
800
802 const SDLoc &DL, MVT VT,
803 SDValue X, unsigned Msb,
804 unsigned Lsb) {
805 // Only supported with XAndesPerf at the moment.
806 if (!Subtarget->hasVendorXAndesPerf())
807 return false;
808
809 unsigned Opc = RISCV::NDS_BFOZ;
810
811 // If the Lsb is equal to the Msb, then the Lsb should be 0.
812 if (Lsb == Msb)
813 Lsb = 0;
814 SDNode *Ubi = CurDAG->getMachineNode(Opc, DL, VT, X,
815 CurDAG->getTargetConstant(Lsb, DL, VT),
816 CurDAG->getTargetConstant(Msb, DL, VT));
817 ReplaceNode(Node, Ubi);
818 return true;
819}
820
822 // Target does not support indexed loads.
823 if (!Subtarget->hasVendorXTHeadMemIdx())
824 return false;
825
828 if (AM == ISD::UNINDEXED)
829 return false;
830
832 if (!C)
833 return false;
834
835 EVT LoadVT = Ld->getMemoryVT();
836 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
837 "Unexpected addressing mode");
838 bool IsPre = AM == ISD::PRE_INC;
839 bool IsPost = AM == ISD::POST_INC;
840 int64_t Offset = C->getSExtValue();
841
842 // The constants that can be encoded in the THeadMemIdx instructions
843 // are of the form (sign_extend(imm5) << imm2).
844 unsigned Shift;
845 for (Shift = 0; Shift < 4; Shift++)
846 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
847 break;
848
849 // Constant cannot be encoded.
850 if (Shift == 4)
851 return false;
852
853 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
854 unsigned Opcode;
855 if (LoadVT == MVT::i8 && IsPre)
856 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
857 else if (LoadVT == MVT::i8 && IsPost)
858 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
859 else if (LoadVT == MVT::i16 && IsPre)
860 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
861 else if (LoadVT == MVT::i16 && IsPost)
862 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
863 else if (LoadVT == MVT::i32 && IsPre)
864 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
865 else if (LoadVT == MVT::i32 && IsPost)
866 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
867 else if (LoadVT == MVT::i64 && IsPre)
868 Opcode = RISCV::TH_LDIB;
869 else if (LoadVT == MVT::i64 && IsPost)
870 Opcode = RISCV::TH_LDIA;
871 else
872 return false;
873
874 EVT Ty = Ld->getOffset().getValueType();
875 SDValue Ops[] = {
876 Ld->getBasePtr(),
877 CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
878 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), Ld->getChain()};
879 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
880 Ld->getValueType(1), MVT::Other, Ops);
881
882 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
883 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
884
885 ReplaceNode(Node, New);
886
887 return true;
888}
889
890static Register getTileReg(uint64_t TileNum) {
891 assert(TileNum <= 15 && "Invalid tile number");
892 return RISCV::T0 + TileNum;
893}
894
896 if (!Subtarget->hasVInstructions())
897 return;
898
899 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
900
901 SDLoc DL(Node);
902 unsigned IntNo = Node->getConstantOperandVal(1);
903
904 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
905 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
906 "Unexpected vsetvli intrinsic");
907
908 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
909 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
910 SDValue SEWOp =
911 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
912 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
913 Node->getOperand(4), Node->getOperand(5),
914 Node->getOperand(8), SEWOp,
915 Node->getOperand(0)};
916
917 unsigned Opcode;
918 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
919 switch (LMulSDNode->getSExtValue()) {
920 case 5:
921 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF8
922 : RISCV::PseudoSF_VC_I_SE_MF8;
923 break;
924 case 6:
925 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF4
926 : RISCV::PseudoSF_VC_I_SE_MF4;
927 break;
928 case 7:
929 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF2
930 : RISCV::PseudoSF_VC_I_SE_MF2;
931 break;
932 case 0:
933 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M1
934 : RISCV::PseudoSF_VC_I_SE_M1;
935 break;
936 case 1:
937 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M2
938 : RISCV::PseudoSF_VC_I_SE_M2;
939 break;
940 case 2:
941 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M4
942 : RISCV::PseudoSF_VC_I_SE_M4;
943 break;
944 case 3:
945 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M8
946 : RISCV::PseudoSF_VC_I_SE_M8;
947 break;
948 }
949
950 ReplaceNode(Node, CurDAG->getMachineNode(
951 Opcode, DL, Node->getSimpleValueType(0), Operands));
952}
953
954static unsigned getSegInstNF(unsigned Intrinsic) {
955#define INST_NF_CASE(NAME, NF) \
956 case Intrinsic::riscv_##NAME##NF: \
957 return NF;
958#define INST_NF_CASE_MASK(NAME, NF) \
959 case Intrinsic::riscv_##NAME##NF##_mask: \
960 return NF;
961#define INST_NF_CASE_FF(NAME, NF) \
962 case Intrinsic::riscv_##NAME##NF##ff: \
963 return NF;
964#define INST_NF_CASE_FF_MASK(NAME, NF) \
965 case Intrinsic::riscv_##NAME##NF##ff_mask: \
966 return NF;
967#define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
968 MACRO_NAME(NAME, 2) \
969 MACRO_NAME(NAME, 3) \
970 MACRO_NAME(NAME, 4) \
971 MACRO_NAME(NAME, 5) \
972 MACRO_NAME(NAME, 6) \
973 MACRO_NAME(NAME, 7) \
974 MACRO_NAME(NAME, 8)
975#define INST_ALL_NF_CASE(NAME) \
976 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
977 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
978#define INST_ALL_NF_CASE_WITH_FF(NAME) \
979 INST_ALL_NF_CASE(NAME) \
980 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
981 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
982 switch (Intrinsic) {
983 default:
984 llvm_unreachable("Unexpected segment load/store intrinsic");
986 INST_ALL_NF_CASE(vlsseg)
987 INST_ALL_NF_CASE(vloxseg)
988 INST_ALL_NF_CASE(vluxseg)
989 INST_ALL_NF_CASE(vsseg)
990 INST_ALL_NF_CASE(vssseg)
991 INST_ALL_NF_CASE(vsoxseg)
992 INST_ALL_NF_CASE(vsuxseg)
993 }
994}
995
996static bool isApplicableToPLI(int Val) {
997 // Check if the immediate is packed i8 or i10
998 int16_t Bit31To16 = Val >> 16;
999 int16_t Bit15To0 = Val;
1000 int8_t Bit15To8 = Bit15To0 >> 8;
1001 int8_t Bit7To0 = Val;
1002 if (Bit31To16 != Bit15To0)
1003 return false;
1004
1005 return isInt<10>(Bit31To16) || Bit15To8 == Bit7To0;
1006}
1007
1009 // If we have a custom node, we have already selected.
1010 if (Node->isMachineOpcode()) {
1011 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
1012 Node->setNodeId(-1);
1013 return;
1014 }
1015
1016 // Instruction Selection not handled by the auto-generated tablegen selection
1017 // should be handled here.
1018 unsigned Opcode = Node->getOpcode();
1019 MVT XLenVT = Subtarget->getXLenVT();
1020 SDLoc DL(Node);
1021 MVT VT = Node->getSimpleValueType(0);
1022
1023 bool HasBitTest = Subtarget->hasBEXTILike();
1024
1025 switch (Opcode) {
1026 case ISD::Constant: {
1027 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
1028 auto *ConstNode = cast<ConstantSDNode>(Node);
1029 if (ConstNode->isZero()) {
1030 SDValue New =
1031 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
1032 ReplaceNode(Node, New.getNode());
1033 return;
1034 }
1035 int64_t Imm = ConstNode->getSExtValue();
1036 // If only the lower 8 bits are used, try to convert this to a simm6 by
1037 // sign-extending bit 7. This is neutral without the C extension, and
1038 // allows C.LI to be used if C is present.
1039 if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
1040 Imm = SignExtend64<8>(Imm);
1041 // If the upper XLen-16 bits are not used, try to convert this to a simm12
1042 // by sign extending bit 15.
1043 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
1045 Imm = SignExtend64<16>(Imm);
1046 // If the upper 32-bits are not used try to convert this into a simm32 by
1047 // sign extending bit 32.
1048 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
1049 Imm = SignExtend64<32>(Imm);
1050
1051 if (Subtarget->enablePExtCodeGen() && isApplicableToPLI(Imm) &&
1052 hasAllWUsers(Node)) {
1053 // If it's 4 packed 8-bit integers or 2 packed signed 16-bit integers, we
1054 // can simply copy lower 32 bits to higher 32 bits to make it able to
1055 // rematerialize to PLI_B or PLI_H
1056 Imm = ((uint64_t)Imm << 32) | (Imm & 0xFFFFFFFF);
1057 }
1058
1059 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
1060 return;
1061 }
1062 case ISD::ConstantFP: {
1063 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
1064
1065 bool Is64Bit = Subtarget->is64Bit();
1066 bool HasZdinx = Subtarget->hasStdExtZdinx();
1067
1068 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
1069 SDValue Imm;
1070 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
1071 // create an integer immediate.
1072 if (APF.isPosZero() || NegZeroF64) {
1073 if (VT == MVT::f64 && HasZdinx && !Is64Bit)
1074 Imm = CurDAG->getRegister(RISCV::X0_Pair, MVT::f64);
1075 else
1076 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
1077 } else {
1078 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
1079 *Subtarget);
1080 }
1081
1082 unsigned Opc;
1083 switch (VT.SimpleTy) {
1084 default:
1085 llvm_unreachable("Unexpected size");
1086 case MVT::bf16:
1087 assert(Subtarget->hasStdExtZfbfmin());
1088 Opc = RISCV::FMV_H_X;
1089 break;
1090 case MVT::f16:
1091 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
1092 break;
1093 case MVT::f32:
1094 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
1095 break;
1096 case MVT::f64:
1097 // For RV32, we can't move from a GPR, we need to convert instead. This
1098 // should only happen for +0.0 and -0.0.
1099 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
1100 if (HasZdinx)
1101 Opc = RISCV::COPY;
1102 else
1103 Opc = Is64Bit ? RISCV::FMV_D_X : RISCV::FCVT_D_W;
1104 break;
1105 }
1106
1107 SDNode *Res;
1108 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
1109 Res =
1110 CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
1111 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
1112 Res =
1113 CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
1114 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
1115 Res = CurDAG->getMachineNode(
1116 Opc, DL, VT, Imm,
1117 CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
1118 else
1119 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1120
1121 // For f64 -0.0, we need to insert a fneg.d idiom.
1122 if (NegZeroF64) {
1123 Opc = RISCV::FSGNJN_D;
1124 if (HasZdinx)
1125 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1126 Res =
1127 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1128 }
1129
1130 ReplaceNode(Node, Res);
1131 return;
1132 }
1133 case RISCVISD::BuildGPRPair:
1134 case RISCVISD::BuildPairF64: {
1135 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
1136 break;
1137
1138 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) &&
1139 "BuildPairF64 only handled here on rv32i_zdinx");
1140
1141 SDValue Ops[] = {
1142 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1143 Node->getOperand(0),
1144 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1145 Node->getOperand(1),
1146 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1147
1148 SDNode *N = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
1149 ReplaceNode(Node, N);
1150 return;
1151 }
1152 case RISCVISD::SplitGPRPair:
1153 case RISCVISD::SplitF64: {
1154 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
1155 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) &&
1156 "SplitF64 only handled here on rv32i_zdinx");
1157
1158 if (!SDValue(Node, 0).use_empty()) {
1159 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1160 Node->getValueType(0),
1161 Node->getOperand(0));
1162 ReplaceUses(SDValue(Node, 0), Lo);
1163 }
1164
1165 if (!SDValue(Node, 1).use_empty()) {
1166 SDValue Hi = CurDAG->getTargetExtractSubreg(
1167 RISCV::sub_gpr_odd, DL, Node->getValueType(1), Node->getOperand(0));
1168 ReplaceUses(SDValue(Node, 1), Hi);
1169 }
1170
1171 CurDAG->RemoveDeadNode(Node);
1172 return;
1173 }
1174
1175 assert(Opcode != RISCVISD::SplitGPRPair &&
1176 "SplitGPRPair should already be handled");
1177
1178 if (!Subtarget->hasStdExtZfa())
1179 break;
1180 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1181 "Unexpected subtarget");
1182
1183 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1184 if (!SDValue(Node, 0).use_empty()) {
1185 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1186 Node->getOperand(0));
1187 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1188 }
1189 if (!SDValue(Node, 1).use_empty()) {
1190 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1191 Node->getOperand(0));
1192 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1193 }
1194
1195 CurDAG->RemoveDeadNode(Node);
1196 return;
1197 }
1198 case ISD::SHL: {
1199 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1200 if (!N1C)
1201 break;
1202 SDValue N0 = Node->getOperand(0);
1203 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1205 break;
1206 unsigned ShAmt = N1C->getZExtValue();
1207 uint64_t Mask = N0.getConstantOperandVal(1);
1208
1209 if (isShiftedMask_64(Mask)) {
1210 unsigned XLen = Subtarget->getXLen();
1211 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1212 unsigned TrailingZeros = llvm::countr_zero(Mask);
1213 if (ShAmt <= 32 && TrailingZeros > 0 && LeadingZeros == 32) {
1214 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1215 // where C2 has 32 leading zeros and C3 trailing zeros.
1216 SDNode *SRLIW = CurDAG->getMachineNode(
1217 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1218 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1219 SDNode *SLLI = CurDAG->getMachineNode(
1220 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1221 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1222 ReplaceNode(Node, SLLI);
1223 return;
1224 }
1225 if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1226 XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1227 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1228 // where C2 has C4 leading zeros and no trailing zeros.
1229 // This is profitable if the "and" was to be lowered to
1230 // (srli (slli X, C4), C4) and not (andi X, C2).
1231 // For "LeadingZeros == 32":
1232 // - with Zba it's just (slli.uw X, C)
1233 // - without Zba a tablegen pattern applies the very same
1234 // transform as we would have done here
1235 SDNode *SLLI = CurDAG->getMachineNode(
1236 RISCV::SLLI, DL, VT, N0.getOperand(0),
1237 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1238 SDNode *SRLI = CurDAG->getMachineNode(
1239 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1240 CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
1241 ReplaceNode(Node, SRLI);
1242 return;
1243 }
1244 }
1245 break;
1246 }
1247 case ISD::SRL: {
1248 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1249 if (!N1C)
1250 break;
1251 SDValue N0 = Node->getOperand(0);
1252 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1253 break;
1254 unsigned ShAmt = N1C->getZExtValue();
1255 uint64_t Mask = N0.getConstantOperandVal(1);
1256
1257 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1258 // 32 leading zeros and C3 trailing zeros.
1259 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1260 unsigned XLen = Subtarget->getXLen();
1261 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1262 unsigned TrailingZeros = llvm::countr_zero(Mask);
1263 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1264 SDNode *SRLIW = CurDAG->getMachineNode(
1265 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1266 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1267 SDNode *SLLI = CurDAG->getMachineNode(
1268 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1269 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1270 ReplaceNode(Node, SLLI);
1271 return;
1272 }
1273 }
1274
1275 // Optimize (srl (and X, C2), C) ->
1276 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1277 // Where C2 is a mask with C3 trailing ones.
1278 // Taking into account that the C2 may have had lower bits unset by
1279 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1280 // This pattern occurs when type legalizing right shifts for types with
1281 // less than XLen bits.
1282 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1283 if (!isMask_64(Mask))
1284 break;
1285 unsigned TrailingOnes = llvm::countr_one(Mask);
1286 if (ShAmt >= TrailingOnes)
1287 break;
1288 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1289 if (TrailingOnes == 32) {
1290 SDNode *SRLI = CurDAG->getMachineNode(
1291 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1292 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1293 ReplaceNode(Node, SRLI);
1294 return;
1295 }
1296
1297 // Only do the remaining transforms if the AND has one use.
1298 if (!N0.hasOneUse())
1299 break;
1300
1301 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1302 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1303 SDNode *BEXTI = CurDAG->getMachineNode(
1304 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1305 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1306 ReplaceNode(Node, BEXTI);
1307 return;
1308 }
1309
1310 const unsigned Msb = TrailingOnes - 1;
1311 const unsigned Lsb = ShAmt;
1312 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0.getOperand(0), Msb, Lsb))
1313 return;
1314
1315 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1316 SDNode *SLLI =
1317 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1318 CurDAG->getTargetConstant(LShAmt, DL, VT));
1319 SDNode *SRLI = CurDAG->getMachineNode(
1320 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1321 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1322 ReplaceNode(Node, SRLI);
1323 return;
1324 }
1325 case ISD::SRA: {
1327 return;
1328
1330 return;
1331
1332 // Optimize (sra (sext_inreg X, i16), C) ->
1333 // (srai (slli X, (XLen-16), (XLen-16) + C)
1334 // And (sra (sext_inreg X, i8), C) ->
1335 // (srai (slli X, (XLen-8), (XLen-8) + C)
1336 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1337 // This transform matches the code we get without Zbb. The shifts are more
1338 // compressible, and this can help expose CSE opportunities in the sdiv by
1339 // constant optimization.
1340 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1341 if (!N1C)
1342 break;
1343 SDValue N0 = Node->getOperand(0);
1344 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1345 break;
1346 unsigned ShAmt = N1C->getZExtValue();
1347 unsigned ExtSize =
1348 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1349 // ExtSize of 32 should use sraiw via tablegen pattern.
1350 if (ExtSize >= 32 || ShAmt >= ExtSize)
1351 break;
1352 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1353 SDNode *SLLI =
1354 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1355 CurDAG->getTargetConstant(LShAmt, DL, VT));
1356 SDNode *SRAI = CurDAG->getMachineNode(
1357 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1358 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1359 ReplaceNode(Node, SRAI);
1360 return;
1361 }
1362 case ISD::OR: {
1364 return;
1365
1366 break;
1367 }
1368 case ISD::XOR:
1370 return;
1371
1372 break;
1373 case ISD::AND: {
1374 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1375 if (!N1C)
1376 break;
1377
1378 SDValue N0 = Node->getOperand(0);
1379
1380 bool LeftShift = N0.getOpcode() == ISD::SHL;
1381 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1382 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1383 if (!C)
1384 break;
1385 unsigned C2 = C->getZExtValue();
1386 unsigned XLen = Subtarget->getXLen();
1387 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1388
1389 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1390 // shift pair might offer more compression opportunities.
1391 // TODO: We could check for C extension here, but we don't have many lit
1392 // tests with the C extension enabled so not checking gets better
1393 // coverage.
1394 // TODO: What if ANDI faster than shift?
1395 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1396
1397 uint64_t C1 = N1C->getZExtValue();
1398
1399 // Clear irrelevant bits in the mask.
1400 if (LeftShift)
1402 else
1403 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1404
1405 // Some transforms should only be done if the shift has a single use or
1406 // the AND would become (srli (slli X, 32), 32)
1407 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1408
1409 SDValue X = N0.getOperand(0);
1410
1411 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1412 // with c3 leading zeros.
1413 if (!LeftShift && isMask_64(C1)) {
1414 unsigned Leading = XLen - llvm::bit_width(C1);
1415 if (C2 < Leading) {
1416 // If the number of leading zeros is C2+32 this can be SRLIW.
1417 if (C2 + 32 == Leading) {
1418 SDNode *SRLIW = CurDAG->getMachineNode(
1419 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1420 ReplaceNode(Node, SRLIW);
1421 return;
1422 }
1423
1424 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1425 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1426 //
1427 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1428 // legalized and goes through DAG combine.
1429 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1430 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1431 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1432 SDNode *SRAIW =
1433 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1434 CurDAG->getTargetConstant(31, DL, VT));
1435 SDNode *SRLIW = CurDAG->getMachineNode(
1436 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1437 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1438 ReplaceNode(Node, SRLIW);
1439 return;
1440 }
1441
1442 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1443 // available.
1444 // Transform (and (srl x, C2), C1)
1445 // -> (<bfextract> x, msb, lsb)
1446 //
1447 // Make sure to keep this below the SRLIW cases, as we always want to
1448 // prefer the more common instruction.
1449 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1450 const unsigned Lsb = C2;
1451 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1452 return;
1453
1454 // (srli (slli x, c3-c2), c3).
1455 // Skip if we could use (zext.w (sraiw X, C2)).
1456 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1457 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1458 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1459 // Also Skip if we can use bexti or th.tst.
1460 Skip |= HasBitTest && Leading == XLen - 1;
1461 if (OneUseOrZExtW && !Skip) {
1462 SDNode *SLLI = CurDAG->getMachineNode(
1463 RISCV::SLLI, DL, VT, X,
1464 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1465 SDNode *SRLI = CurDAG->getMachineNode(
1466 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1467 CurDAG->getTargetConstant(Leading, DL, VT));
1468 ReplaceNode(Node, SRLI);
1469 return;
1470 }
1471 }
1472 }
1473
1474 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1475 // shifted by c2 bits with c3 leading zeros.
1476 if (LeftShift && isShiftedMask_64(C1)) {
1477 unsigned Leading = XLen - llvm::bit_width(C1);
1478
1479 if (C2 + Leading < XLen &&
1480 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1481 // Use slli.uw when possible.
1482 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1483 SDNode *SLLI_UW =
1484 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1485 CurDAG->getTargetConstant(C2, DL, VT));
1486 ReplaceNode(Node, SLLI_UW);
1487 return;
1488 }
1489
1490 // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
1491 // available.
1492 // Transform (and (shl x, c2), c1)
1493 // -> (<bfinsert> x, msb, lsb)
1494 // e.g.
1495 // (and (shl x, 12), 0x00fff000)
1496 // If XLen = 32 and C2 = 12, then
1497 // Msb = 32 - 8 - 1 = 23 and Lsb = 12
1498 const unsigned Msb = XLen - Leading - 1;
1499 const unsigned Lsb = C2;
1500 if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
1501 return;
1502
1503 // (srli (slli c2+c3), c3)
1504 if (OneUseOrZExtW && !IsCANDI) {
1505 SDNode *SLLI = CurDAG->getMachineNode(
1506 RISCV::SLLI, DL, VT, X,
1507 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1508 SDNode *SRLI = CurDAG->getMachineNode(
1509 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1510 CurDAG->getTargetConstant(Leading, DL, VT));
1511 ReplaceNode(Node, SRLI);
1512 return;
1513 }
1514 }
1515 }
1516
1517 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1518 // shifted mask with c2 leading zeros and c3 trailing zeros.
1519 if (!LeftShift && isShiftedMask_64(C1)) {
1520 unsigned Leading = XLen - llvm::bit_width(C1);
1521 unsigned Trailing = llvm::countr_zero(C1);
1522 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1523 !IsCANDI) {
1524 unsigned SrliOpc = RISCV::SRLI;
1525 // If the input is zexti32 we should use SRLIW.
1526 if (X.getOpcode() == ISD::AND &&
1527 isa<ConstantSDNode>(X.getOperand(1)) &&
1528 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1529 SrliOpc = RISCV::SRLIW;
1530 X = X.getOperand(0);
1531 }
1532 SDNode *SRLI = CurDAG->getMachineNode(
1533 SrliOpc, DL, VT, X,
1534 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1535 SDNode *SLLI = CurDAG->getMachineNode(
1536 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1537 CurDAG->getTargetConstant(Trailing, DL, VT));
1538 ReplaceNode(Node, SLLI);
1539 return;
1540 }
1541 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1542 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1543 OneUseOrZExtW && !IsCANDI) {
1544 SDNode *SRLIW = CurDAG->getMachineNode(
1545 RISCV::SRLIW, DL, VT, X,
1546 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1547 SDNode *SLLI = CurDAG->getMachineNode(
1548 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1549 CurDAG->getTargetConstant(Trailing, DL, VT));
1550 ReplaceNode(Node, SLLI);
1551 return;
1552 }
1553 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1554 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1555 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1556 SDNode *SRLI = CurDAG->getMachineNode(
1557 RISCV::SRLI, DL, VT, X,
1558 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1559 SDNode *SLLI_UW = CurDAG->getMachineNode(
1560 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1561 CurDAG->getTargetConstant(Trailing, DL, VT));
1562 ReplaceNode(Node, SLLI_UW);
1563 return;
1564 }
1565 }
1566
1567 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1568 // shifted mask with no leading zeros and c3 trailing zeros.
1569 if (LeftShift && isShiftedMask_64(C1)) {
1570 unsigned Leading = XLen - llvm::bit_width(C1);
1571 unsigned Trailing = llvm::countr_zero(C1);
1572 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1573 SDNode *SRLI = CurDAG->getMachineNode(
1574 RISCV::SRLI, DL, VT, X,
1575 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1576 SDNode *SLLI = CurDAG->getMachineNode(
1577 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1578 CurDAG->getTargetConstant(Trailing, DL, VT));
1579 ReplaceNode(Node, SLLI);
1580 return;
1581 }
1582 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1583 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1584 SDNode *SRLIW = CurDAG->getMachineNode(
1585 RISCV::SRLIW, DL, VT, X,
1586 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1587 SDNode *SLLI = CurDAG->getMachineNode(
1588 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1589 CurDAG->getTargetConstant(Trailing, DL, VT));
1590 ReplaceNode(Node, SLLI);
1591 return;
1592 }
1593
1594 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1595 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1596 Subtarget->hasStdExtZba()) {
1597 SDNode *SRLI = CurDAG->getMachineNode(
1598 RISCV::SRLI, DL, VT, X,
1599 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1600 SDNode *SLLI_UW = CurDAG->getMachineNode(
1601 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1602 CurDAG->getTargetConstant(Trailing, DL, VT));
1603 ReplaceNode(Node, SLLI_UW);
1604 return;
1605 }
1606 }
1607 }
1608
1609 const uint64_t C1 = N1C->getZExtValue();
1610
1611 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1612 N0.hasOneUse()) {
1613 unsigned C2 = N0.getConstantOperandVal(1);
1614 unsigned XLen = Subtarget->getXLen();
1615 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1616
1617 SDValue X = N0.getOperand(0);
1618
1619 // Prefer SRAIW + ANDI when possible.
1620 bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1621 X.getOpcode() == ISD::SHL &&
1622 isa<ConstantSDNode>(X.getOperand(1)) &&
1623 X.getConstantOperandVal(1) == 32;
1624 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1625 // mask with c3 leading zeros and c2 is larger than c3.
1626 if (isMask_64(C1) && !Skip) {
1627 unsigned Leading = XLen - llvm::bit_width(C1);
1628 if (C2 > Leading) {
1629 SDNode *SRAI = CurDAG->getMachineNode(
1630 RISCV::SRAI, DL, VT, X,
1631 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1632 SDNode *SRLI = CurDAG->getMachineNode(
1633 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1634 CurDAG->getTargetConstant(Leading, DL, VT));
1635 ReplaceNode(Node, SRLI);
1636 return;
1637 }
1638 }
1639
1640 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1641 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1642 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1643 if (isShiftedMask_64(C1) && !Skip) {
1644 unsigned Leading = XLen - llvm::bit_width(C1);
1645 unsigned Trailing = llvm::countr_zero(C1);
1646 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1647 SDNode *SRAI = CurDAG->getMachineNode(
1648 RISCV::SRAI, DL, VT, N0.getOperand(0),
1649 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1650 SDNode *SRLI = CurDAG->getMachineNode(
1651 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1652 CurDAG->getTargetConstant(Leading + Trailing, DL, VT));
1653 SDNode *SLLI = CurDAG->getMachineNode(
1654 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1655 CurDAG->getTargetConstant(Trailing, DL, VT));
1656 ReplaceNode(Node, SLLI);
1657 return;
1658 }
1659 }
1660 }
1661
1662 // If C1 masks off the upper bits only (but can't be formed as an
1663 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1664 // available.
1665 // Transform (and x, C1)
1666 // -> (<bfextract> x, msb, lsb)
1667 if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue()) &&
1668 !(C1 == 0xffff && Subtarget->hasStdExtZbb()) &&
1669 !(C1 == 0xffffffff && Subtarget->hasStdExtZba())) {
1670 const unsigned Msb = llvm::bit_width(C1) - 1;
1671 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1672 return;
1673 }
1674
1676 return;
1677
1678 break;
1679 }
1680 case ISD::MUL: {
1681 // Special case for calculating (mul (and X, C2), C1) where the full product
1682 // fits in XLen bits. We can shift X left by the number of leading zeros in
1683 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1684 // product has XLen trailing zeros, putting it in the output of MULHU. This
1685 // can avoid materializing a constant in a register for C2.
1686
1687 // RHS should be a constant.
1688 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1689 if (!N1C || !N1C->hasOneUse())
1690 break;
1691
1692 // LHS should be an AND with constant.
1693 SDValue N0 = Node->getOperand(0);
1694 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1695 break;
1696
1698
1699 // Constant should be a mask.
1700 if (!isMask_64(C2))
1701 break;
1702
1703 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1704 // multiple users or the constant is a simm12. This prevents inserting a
1705 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1706 // make it more costly to materialize. Otherwise, using a SLLI might allow
1707 // it to be compressed.
1708 bool IsANDIOrZExt =
1709 isInt<12>(C2) ||
1710 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1711 // With XTHeadBb, we can use TH.EXTU.
1712 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1713 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1714 break;
1715 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1716 // the constant is a simm32.
1717 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1718 // With XTHeadBb, we can use TH.EXTU.
1719 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1720 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1721 break;
1722
1723 // We need to shift left the AND input and C1 by a total of XLen bits.
1724
1725 // How far left do we need to shift the AND input?
1726 unsigned XLen = Subtarget->getXLen();
1727 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1728
1729 // The constant gets shifted by the remaining amount unless that would
1730 // shift bits out.
1731 uint64_t C1 = N1C->getZExtValue();
1732 unsigned ConstantShift = XLen - LeadingZeros;
1733 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1734 break;
1735
1736 uint64_t ShiftedC1 = C1 << ConstantShift;
1737 // If this RV32, we need to sign extend the constant.
1738 if (XLen == 32)
1739 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1740
1741 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1742 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1743 SDNode *SLLI =
1744 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1745 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1746 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1747 SDValue(SLLI, 0), SDValue(Imm, 0));
1748 ReplaceNode(Node, MULHU);
1749 return;
1750 }
1751 case ISD::LOAD: {
1752 if (tryIndexedLoad(Node))
1753 return;
1754
1755 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1756 // We match post-incrementing load here
1758 if (Load->getAddressingMode() != ISD::POST_INC)
1759 break;
1760
1761 SDValue Chain = Node->getOperand(0);
1762 SDValue Base = Node->getOperand(1);
1763 SDValue Offset = Node->getOperand(2);
1764
1765 bool Simm12 = false;
1766 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1767
1768 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1769 int ConstantVal = ConstantOffset->getSExtValue();
1770 Simm12 = isInt<12>(ConstantVal);
1771 if (Simm12)
1772 Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1773 Offset.getValueType());
1774 }
1775
1776 unsigned Opcode = 0;
1777 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1778 case MVT::i8:
1779 if (Simm12 && SignExtend)
1780 Opcode = RISCV::CV_LB_ri_inc;
1781 else if (Simm12 && !SignExtend)
1782 Opcode = RISCV::CV_LBU_ri_inc;
1783 else if (!Simm12 && SignExtend)
1784 Opcode = RISCV::CV_LB_rr_inc;
1785 else
1786 Opcode = RISCV::CV_LBU_rr_inc;
1787 break;
1788 case MVT::i16:
1789 if (Simm12 && SignExtend)
1790 Opcode = RISCV::CV_LH_ri_inc;
1791 else if (Simm12 && !SignExtend)
1792 Opcode = RISCV::CV_LHU_ri_inc;
1793 else if (!Simm12 && SignExtend)
1794 Opcode = RISCV::CV_LH_rr_inc;
1795 else
1796 Opcode = RISCV::CV_LHU_rr_inc;
1797 break;
1798 case MVT::i32:
1799 if (Simm12)
1800 Opcode = RISCV::CV_LW_ri_inc;
1801 else
1802 Opcode = RISCV::CV_LW_rr_inc;
1803 break;
1804 default:
1805 break;
1806 }
1807 if (!Opcode)
1808 break;
1809
1810 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1811 Chain.getSimpleValueType(), Base,
1812 Offset, Chain));
1813 return;
1814 }
1815 break;
1816 }
1817 case RISCVISD::LD_RV32: {
1818 assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
1819
1821 SDValue Chain = Node->getOperand(0);
1822 SDValue Addr = Node->getOperand(1);
1824
1825 SDValue Ops[] = {Base, Offset, Chain};
1826 MachineSDNode *New = CurDAG->getMachineNode(
1827 RISCV::LD_RV32, DL, {MVT::Untyped, MVT::Other}, Ops);
1828 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1829 MVT::i32, SDValue(New, 0));
1830 SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL,
1831 MVT::i32, SDValue(New, 0));
1832 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1833 ReplaceUses(SDValue(Node, 0), Lo);
1834 ReplaceUses(SDValue(Node, 1), Hi);
1835 ReplaceUses(SDValue(Node, 2), SDValue(New, 1));
1836 CurDAG->RemoveDeadNode(Node);
1837 return;
1838 }
1839 case RISCVISD::SD_RV32: {
1841 SDValue Chain = Node->getOperand(0);
1842 SDValue Addr = Node->getOperand(3);
1844
1845 SDValue Lo = Node->getOperand(1);
1846 SDValue Hi = Node->getOperand(2);
1847
1848 SDValue RegPair;
1849 // Peephole to use X0_Pair for storing zero.
1851 RegPair = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
1852 } else {
1853 SDValue Ops[] = {
1854 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Lo,
1855 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Hi,
1856 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1857
1858 RegPair = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
1859 MVT::Untyped, Ops),
1860 0);
1861 }
1862
1863 MachineSDNode *New = CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other,
1864 {RegPair, Base, Offset, Chain});
1865 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1866 ReplaceUses(SDValue(Node, 0), SDValue(New, 0));
1867 CurDAG->RemoveDeadNode(Node);
1868 return;
1869 }
1870 case RISCVISD::PPACK_DH: {
1871 assert(Subtarget->enablePExtCodeGen() && Subtarget->isRV32());
1872
1873 SDValue Val0 = Node->getOperand(0);
1874 SDValue Val1 = Node->getOperand(1);
1875 SDValue Val2 = Node->getOperand(2);
1876 SDValue Val3 = Node->getOperand(3);
1877
1878 SDValue Ops[] = {
1879 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Val0,
1880 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Val2,
1881 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1882 SDValue RegPair0 =
1883 SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
1884 MVT::Untyped, Ops),
1885 0);
1886 SDValue Ops1[] = {
1887 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Val1,
1888 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Val3,
1889 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1890 SDValue RegPair1 =
1891 SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
1892 MVT::Untyped, Ops1),
1893 0);
1894
1895 MachineSDNode *PackDH = CurDAG->getMachineNode(
1896 RISCV::PPACK_DH, DL, MVT::Untyped, {RegPair0, RegPair1});
1897
1898 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1899 MVT::i32, SDValue(PackDH, 0));
1900 SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL,
1901 MVT::i32, SDValue(PackDH, 0));
1902 ReplaceUses(SDValue(Node, 0), Lo);
1903 ReplaceUses(SDValue(Node, 1), Hi);
1904 CurDAG->RemoveDeadNode(Node);
1905 return;
1906 }
1908 unsigned IntNo = Node->getConstantOperandVal(0);
1909 switch (IntNo) {
1910 // By default we do not custom select any intrinsic.
1911 default:
1912 break;
1913 case Intrinsic::riscv_vmsgeu:
1914 case Intrinsic::riscv_vmsge: {
1915 SDValue Src1 = Node->getOperand(1);
1916 SDValue Src2 = Node->getOperand(2);
1917 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1918 bool IsCmpConstant = false;
1919 bool IsCmpMinimum = false;
1920 // Only custom select scalar second operand.
1921 if (Src2.getValueType() != XLenVT)
1922 break;
1923 // Small constants are handled with patterns.
1924 int64_t CVal = 0;
1925 MVT Src1VT = Src1.getSimpleValueType();
1926 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1927 IsCmpConstant = true;
1928 CVal = C->getSExtValue();
1929 if (CVal >= -15 && CVal <= 16) {
1930 if (!IsUnsigned || CVal != 0)
1931 break;
1932 IsCmpMinimum = true;
1933 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1934 Src1VT.getScalarSizeInBits())
1935 .getSExtValue()) {
1936 IsCmpMinimum = true;
1937 }
1938 }
1939 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
1940 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1941 default:
1942 llvm_unreachable("Unexpected LMUL!");
1943#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
1944 case RISCVVType::lmulenum: \
1945 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1946 : RISCV::PseudoVMSLT_VX_##suffix; \
1947 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
1948 : RISCV::PseudoVMSGT_VX_##suffix; \
1949 break;
1950 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1951 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1952 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1953 CASE_VMSLT_OPCODES(LMUL_1, M1)
1954 CASE_VMSLT_OPCODES(LMUL_2, M2)
1955 CASE_VMSLT_OPCODES(LMUL_4, M4)
1956 CASE_VMSLT_OPCODES(LMUL_8, M8)
1957#undef CASE_VMSLT_OPCODES
1958 }
1959 // Mask operations use the LMUL from the mask type.
1960 switch (RISCVTargetLowering::getLMUL(VT)) {
1961 default:
1962 llvm_unreachable("Unexpected LMUL!");
1963#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
1964 case RISCVVType::lmulenum: \
1965 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1966 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
1967 break;
1968 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
1969 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
1970 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
1971 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
1972 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
1973 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
1974 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
1975#undef CASE_VMNAND_VMSET_OPCODES
1976 }
1977 SDValue SEW = CurDAG->getTargetConstant(
1978 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1979 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1980 SDValue VL;
1981 selectVLOp(Node->getOperand(3), VL);
1982
1983 // If vmsge(u) with minimum value, expand it to vmset.
1984 if (IsCmpMinimum) {
1986 CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, MaskSEW));
1987 return;
1988 }
1989
1990 if (IsCmpConstant) {
1991 SDValue Imm =
1992 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
1993
1994 ReplaceNode(Node, CurDAG->getMachineNode(VMSGTOpcode, DL, VT,
1995 {Src1, Imm, VL, SEW}));
1996 return;
1997 }
1998
1999 // Expand to
2000 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
2001 SDValue Cmp = SDValue(
2002 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2003 0);
2004 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
2005 {Cmp, Cmp, VL, MaskSEW}));
2006 return;
2007 }
2008 case Intrinsic::riscv_vmsgeu_mask:
2009 case Intrinsic::riscv_vmsge_mask: {
2010 SDValue Src1 = Node->getOperand(2);
2011 SDValue Src2 = Node->getOperand(3);
2012 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
2013 bool IsCmpConstant = false;
2014 bool IsCmpMinimum = false;
2015 // Only custom select scalar second operand.
2016 if (Src2.getValueType() != XLenVT)
2017 break;
2018 // Small constants are handled with patterns.
2019 MVT Src1VT = Src1.getSimpleValueType();
2020 int64_t CVal = 0;
2021 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
2022 IsCmpConstant = true;
2023 CVal = C->getSExtValue();
2024 if (CVal >= -15 && CVal <= 16) {
2025 if (!IsUnsigned || CVal != 0)
2026 break;
2027 IsCmpMinimum = true;
2028 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2029 Src1VT.getScalarSizeInBits())
2030 .getSExtValue()) {
2031 IsCmpMinimum = true;
2032 }
2033 }
2034 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
2035 VMOROpcode, VMSGTMaskOpcode;
2036 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
2037 default:
2038 llvm_unreachable("Unexpected LMUL!");
2039#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2040 case RISCVVType::lmulenum: \
2041 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2042 : RISCV::PseudoVMSLT_VX_##suffix; \
2043 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
2044 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
2045 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
2046 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
2047 break;
2048 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2049 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2050 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2051 CASE_VMSLT_OPCODES(LMUL_1, M1)
2052 CASE_VMSLT_OPCODES(LMUL_2, M2)
2053 CASE_VMSLT_OPCODES(LMUL_4, M4)
2054 CASE_VMSLT_OPCODES(LMUL_8, M8)
2055#undef CASE_VMSLT_OPCODES
2056 }
2057 // Mask operations use the LMUL from the mask type.
2058 switch (RISCVTargetLowering::getLMUL(VT)) {
2059 default:
2060 llvm_unreachable("Unexpected LMUL!");
2061#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
2062 case RISCVVType::lmulenum: \
2063 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
2064 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
2065 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
2066 break;
2067 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
2068 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
2069 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
2074#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
2075 }
2076 SDValue SEW = CurDAG->getTargetConstant(
2077 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
2078 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
2079 SDValue VL;
2080 selectVLOp(Node->getOperand(5), VL);
2081 SDValue MaskedOff = Node->getOperand(1);
2082 SDValue Mask = Node->getOperand(4);
2083
2084 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
2085 if (IsCmpMinimum) {
2086 // We don't need vmor if the MaskedOff and the Mask are the same
2087 // value.
2088 if (Mask == MaskedOff) {
2089 ReplaceUses(Node, Mask.getNode());
2090 return;
2091 }
2093 CurDAG->getMachineNode(VMOROpcode, DL, VT,
2094 {Mask, MaskedOff, VL, MaskSEW}));
2095 return;
2096 }
2097
2098 // If the MaskedOff value and the Mask are the same value use
2099 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
2100 // This avoids needing to copy v0 to vd before starting the next sequence.
2101 if (Mask == MaskedOff) {
2102 SDValue Cmp = SDValue(
2103 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2104 0);
2105 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
2106 {Mask, Cmp, VL, MaskSEW}));
2107 return;
2108 }
2109
2110 SDValue PolicyOp =
2111 CurDAG->getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT);
2112
2113 if (IsCmpConstant) {
2114 SDValue Imm =
2115 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2116
2117 ReplaceNode(Node, CurDAG->getMachineNode(
2118 VMSGTMaskOpcode, DL, VT,
2119 {MaskedOff, Src1, Imm, Mask, VL, SEW, PolicyOp}));
2120 return;
2121 }
2122
2123 // Otherwise use
2124 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
2125 // The result is mask undisturbed.
2126 // We use the same instructions to emulate mask agnostic behavior, because
2127 // the agnostic result can be either undisturbed or all 1.
2128 SDValue Cmp = SDValue(CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
2129 {MaskedOff, Src1, Src2, Mask,
2130 VL, SEW, PolicyOp}),
2131 0);
2132 // vmxor.mm vd, vd, v0 is used to update active value.
2133 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
2134 {Cmp, Mask, VL, MaskSEW}));
2135 return;
2136 }
2137 case Intrinsic::riscv_vsetvli:
2138 case Intrinsic::riscv_vsetvlimax:
2139 return selectVSETVLI(Node);
2140 case Intrinsic::riscv_sf_vsettnt:
2141 case Intrinsic::riscv_sf_vsettm:
2142 case Intrinsic::riscv_sf_vsettk:
2143 return selectXSfmmVSET(Node);
2144 }
2145 break;
2146 }
2148 unsigned IntNo = Node->getConstantOperandVal(1);
2149 switch (IntNo) {
2150 // By default we do not custom select any intrinsic.
2151 default:
2152 break;
2153 case Intrinsic::riscv_vlseg2:
2154 case Intrinsic::riscv_vlseg3:
2155 case Intrinsic::riscv_vlseg4:
2156 case Intrinsic::riscv_vlseg5:
2157 case Intrinsic::riscv_vlseg6:
2158 case Intrinsic::riscv_vlseg7:
2159 case Intrinsic::riscv_vlseg8: {
2160 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2161 /*IsStrided*/ false);
2162 return;
2163 }
2164 case Intrinsic::riscv_vlseg2_mask:
2165 case Intrinsic::riscv_vlseg3_mask:
2166 case Intrinsic::riscv_vlseg4_mask:
2167 case Intrinsic::riscv_vlseg5_mask:
2168 case Intrinsic::riscv_vlseg6_mask:
2169 case Intrinsic::riscv_vlseg7_mask:
2170 case Intrinsic::riscv_vlseg8_mask: {
2171 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2172 /*IsStrided*/ false);
2173 return;
2174 }
2175 case Intrinsic::riscv_vlsseg2:
2176 case Intrinsic::riscv_vlsseg3:
2177 case Intrinsic::riscv_vlsseg4:
2178 case Intrinsic::riscv_vlsseg5:
2179 case Intrinsic::riscv_vlsseg6:
2180 case Intrinsic::riscv_vlsseg7:
2181 case Intrinsic::riscv_vlsseg8: {
2182 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2183 /*IsStrided*/ true);
2184 return;
2185 }
2186 case Intrinsic::riscv_vlsseg2_mask:
2187 case Intrinsic::riscv_vlsseg3_mask:
2188 case Intrinsic::riscv_vlsseg4_mask:
2189 case Intrinsic::riscv_vlsseg5_mask:
2190 case Intrinsic::riscv_vlsseg6_mask:
2191 case Intrinsic::riscv_vlsseg7_mask:
2192 case Intrinsic::riscv_vlsseg8_mask: {
2193 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2194 /*IsStrided*/ true);
2195 return;
2196 }
2197 case Intrinsic::riscv_vloxseg2:
2198 case Intrinsic::riscv_vloxseg3:
2199 case Intrinsic::riscv_vloxseg4:
2200 case Intrinsic::riscv_vloxseg5:
2201 case Intrinsic::riscv_vloxseg6:
2202 case Intrinsic::riscv_vloxseg7:
2203 case Intrinsic::riscv_vloxseg8:
2204 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2205 /*IsOrdered*/ true);
2206 return;
2207 case Intrinsic::riscv_vluxseg2:
2208 case Intrinsic::riscv_vluxseg3:
2209 case Intrinsic::riscv_vluxseg4:
2210 case Intrinsic::riscv_vluxseg5:
2211 case Intrinsic::riscv_vluxseg6:
2212 case Intrinsic::riscv_vluxseg7:
2213 case Intrinsic::riscv_vluxseg8:
2214 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2215 /*IsOrdered*/ false);
2216 return;
2217 case Intrinsic::riscv_vloxseg2_mask:
2218 case Intrinsic::riscv_vloxseg3_mask:
2219 case Intrinsic::riscv_vloxseg4_mask:
2220 case Intrinsic::riscv_vloxseg5_mask:
2221 case Intrinsic::riscv_vloxseg6_mask:
2222 case Intrinsic::riscv_vloxseg7_mask:
2223 case Intrinsic::riscv_vloxseg8_mask:
2224 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2225 /*IsOrdered*/ true);
2226 return;
2227 case Intrinsic::riscv_vluxseg2_mask:
2228 case Intrinsic::riscv_vluxseg3_mask:
2229 case Intrinsic::riscv_vluxseg4_mask:
2230 case Intrinsic::riscv_vluxseg5_mask:
2231 case Intrinsic::riscv_vluxseg6_mask:
2232 case Intrinsic::riscv_vluxseg7_mask:
2233 case Intrinsic::riscv_vluxseg8_mask:
2234 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2235 /*IsOrdered*/ false);
2236 return;
2237 case Intrinsic::riscv_vlseg8ff:
2238 case Intrinsic::riscv_vlseg7ff:
2239 case Intrinsic::riscv_vlseg6ff:
2240 case Intrinsic::riscv_vlseg5ff:
2241 case Intrinsic::riscv_vlseg4ff:
2242 case Intrinsic::riscv_vlseg3ff:
2243 case Intrinsic::riscv_vlseg2ff: {
2244 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ false);
2245 return;
2246 }
2247 case Intrinsic::riscv_vlseg8ff_mask:
2248 case Intrinsic::riscv_vlseg7ff_mask:
2249 case Intrinsic::riscv_vlseg6ff_mask:
2250 case Intrinsic::riscv_vlseg5ff_mask:
2251 case Intrinsic::riscv_vlseg4ff_mask:
2252 case Intrinsic::riscv_vlseg3ff_mask:
2253 case Intrinsic::riscv_vlseg2ff_mask: {
2254 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ true);
2255 return;
2256 }
2257 case Intrinsic::riscv_vloxei:
2258 case Intrinsic::riscv_vloxei_mask:
2259 case Intrinsic::riscv_vluxei:
2260 case Intrinsic::riscv_vluxei_mask: {
2261 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
2262 IntNo == Intrinsic::riscv_vluxei_mask;
2263 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
2264 IntNo == Intrinsic::riscv_vloxei_mask;
2265
2266 MVT VT = Node->getSimpleValueType(0);
2267 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2268
2269 unsigned CurOp = 2;
2270 SmallVector<SDValue, 8> Operands;
2271 Operands.push_back(Node->getOperand(CurOp++));
2272
2273 MVT IndexVT;
2274 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2275 /*IsStridedOrIndexed*/ true, Operands,
2276 /*IsLoad=*/true, &IndexVT);
2277
2279 "Element count mismatch");
2280
2283 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2284 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2285 reportFatalUsageError("The V extension does not support EEW=64 for "
2286 "index values when XLEN=32");
2287 }
2288 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2289 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
2290 static_cast<unsigned>(IndexLMUL));
2291 MachineSDNode *Load =
2292 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2293
2294 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2295
2296 ReplaceNode(Node, Load);
2297 return;
2298 }
2299 case Intrinsic::riscv_vlm:
2300 case Intrinsic::riscv_vle:
2301 case Intrinsic::riscv_vle_mask:
2302 case Intrinsic::riscv_vlse:
2303 case Intrinsic::riscv_vlse_mask: {
2304 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2305 IntNo == Intrinsic::riscv_vlse_mask;
2306 bool IsStrided =
2307 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2308
2309 MVT VT = Node->getSimpleValueType(0);
2310 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2311
2312 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2313 // operand at the IR level. In pseudos, they have both policy and
2314 // passthru operand. The passthru operand is needed to track the
2315 // "tail undefined" state, and the policy is there just for
2316 // for consistency - it will always be "don't care" for the
2317 // unmasked form.
2318 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2319 unsigned CurOp = 2;
2320 SmallVector<SDValue, 8> Operands;
2321 if (HasPassthruOperand)
2322 Operands.push_back(Node->getOperand(CurOp++));
2323 else {
2324 // We eagerly lower to implicit_def (instead of undef), as we
2325 // otherwise fail to select nodes such as: nxv1i1 = undef
2326 SDNode *Passthru =
2327 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
2328 Operands.push_back(SDValue(Passthru, 0));
2329 }
2330 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2331 Operands, /*IsLoad=*/true);
2332
2334 const RISCV::VLEPseudo *P =
2335 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2336 static_cast<unsigned>(LMUL));
2337 MachineSDNode *Load =
2338 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2339
2340 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2341
2342 ReplaceNode(Node, Load);
2343 return;
2344 }
2345 case Intrinsic::riscv_vleff:
2346 case Intrinsic::riscv_vleff_mask: {
2347 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2348
2349 MVT VT = Node->getSimpleValueType(0);
2350 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2351
2352 unsigned CurOp = 2;
2353 SmallVector<SDValue, 7> Operands;
2354 Operands.push_back(Node->getOperand(CurOp++));
2355 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2356 /*IsStridedOrIndexed*/ false, Operands,
2357 /*IsLoad=*/true);
2358
2360 const RISCV::VLEPseudo *P =
2361 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2362 Log2SEW, static_cast<unsigned>(LMUL));
2363 MachineSDNode *Load = CurDAG->getMachineNode(
2364 P->Pseudo, DL, Node->getVTList(), Operands);
2365 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2366
2367 ReplaceNode(Node, Load);
2368 return;
2369 }
2370 case Intrinsic::riscv_nds_vln:
2371 case Intrinsic::riscv_nds_vln_mask:
2372 case Intrinsic::riscv_nds_vlnu:
2373 case Intrinsic::riscv_nds_vlnu_mask: {
2374 bool IsMasked = IntNo == Intrinsic::riscv_nds_vln_mask ||
2375 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2376 bool IsUnsigned = IntNo == Intrinsic::riscv_nds_vlnu ||
2377 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2378
2379 MVT VT = Node->getSimpleValueType(0);
2380 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2381 unsigned CurOp = 2;
2382 SmallVector<SDValue, 8> Operands;
2383
2384 Operands.push_back(Node->getOperand(CurOp++));
2385 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2386 /*IsStridedOrIndexed=*/false, Operands,
2387 /*IsLoad=*/true);
2388
2390 const RISCV::NDSVLNPseudo *P = RISCV::getNDSVLNPseudo(
2391 IsMasked, IsUnsigned, Log2SEW, static_cast<unsigned>(LMUL));
2392 MachineSDNode *Load =
2393 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2394
2395 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2396 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2397
2398 ReplaceNode(Node, Load);
2399 return;
2400 }
2401 }
2402 break;
2403 }
2404 case ISD::INTRINSIC_VOID: {
2405 unsigned IntNo = Node->getConstantOperandVal(1);
2406 switch (IntNo) {
2407 case Intrinsic::riscv_vsseg2:
2408 case Intrinsic::riscv_vsseg3:
2409 case Intrinsic::riscv_vsseg4:
2410 case Intrinsic::riscv_vsseg5:
2411 case Intrinsic::riscv_vsseg6:
2412 case Intrinsic::riscv_vsseg7:
2413 case Intrinsic::riscv_vsseg8: {
2414 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2415 /*IsStrided*/ false);
2416 return;
2417 }
2418 case Intrinsic::riscv_vsseg2_mask:
2419 case Intrinsic::riscv_vsseg3_mask:
2420 case Intrinsic::riscv_vsseg4_mask:
2421 case Intrinsic::riscv_vsseg5_mask:
2422 case Intrinsic::riscv_vsseg6_mask:
2423 case Intrinsic::riscv_vsseg7_mask:
2424 case Intrinsic::riscv_vsseg8_mask: {
2425 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2426 /*IsStrided*/ false);
2427 return;
2428 }
2429 case Intrinsic::riscv_vssseg2:
2430 case Intrinsic::riscv_vssseg3:
2431 case Intrinsic::riscv_vssseg4:
2432 case Intrinsic::riscv_vssseg5:
2433 case Intrinsic::riscv_vssseg6:
2434 case Intrinsic::riscv_vssseg7:
2435 case Intrinsic::riscv_vssseg8: {
2436 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2437 /*IsStrided*/ true);
2438 return;
2439 }
2440 case Intrinsic::riscv_vssseg2_mask:
2441 case Intrinsic::riscv_vssseg3_mask:
2442 case Intrinsic::riscv_vssseg4_mask:
2443 case Intrinsic::riscv_vssseg5_mask:
2444 case Intrinsic::riscv_vssseg6_mask:
2445 case Intrinsic::riscv_vssseg7_mask:
2446 case Intrinsic::riscv_vssseg8_mask: {
2447 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2448 /*IsStrided*/ true);
2449 return;
2450 }
2451 case Intrinsic::riscv_vsoxseg2:
2452 case Intrinsic::riscv_vsoxseg3:
2453 case Intrinsic::riscv_vsoxseg4:
2454 case Intrinsic::riscv_vsoxseg5:
2455 case Intrinsic::riscv_vsoxseg6:
2456 case Intrinsic::riscv_vsoxseg7:
2457 case Intrinsic::riscv_vsoxseg8:
2458 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2459 /*IsOrdered*/ true);
2460 return;
2461 case Intrinsic::riscv_vsuxseg2:
2462 case Intrinsic::riscv_vsuxseg3:
2463 case Intrinsic::riscv_vsuxseg4:
2464 case Intrinsic::riscv_vsuxseg5:
2465 case Intrinsic::riscv_vsuxseg6:
2466 case Intrinsic::riscv_vsuxseg7:
2467 case Intrinsic::riscv_vsuxseg8:
2468 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2469 /*IsOrdered*/ false);
2470 return;
2471 case Intrinsic::riscv_vsoxseg2_mask:
2472 case Intrinsic::riscv_vsoxseg3_mask:
2473 case Intrinsic::riscv_vsoxseg4_mask:
2474 case Intrinsic::riscv_vsoxseg5_mask:
2475 case Intrinsic::riscv_vsoxseg6_mask:
2476 case Intrinsic::riscv_vsoxseg7_mask:
2477 case Intrinsic::riscv_vsoxseg8_mask:
2478 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2479 /*IsOrdered*/ true);
2480 return;
2481 case Intrinsic::riscv_vsuxseg2_mask:
2482 case Intrinsic::riscv_vsuxseg3_mask:
2483 case Intrinsic::riscv_vsuxseg4_mask:
2484 case Intrinsic::riscv_vsuxseg5_mask:
2485 case Intrinsic::riscv_vsuxseg6_mask:
2486 case Intrinsic::riscv_vsuxseg7_mask:
2487 case Intrinsic::riscv_vsuxseg8_mask:
2488 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2489 /*IsOrdered*/ false);
2490 return;
2491 case Intrinsic::riscv_vsoxei:
2492 case Intrinsic::riscv_vsoxei_mask:
2493 case Intrinsic::riscv_vsuxei:
2494 case Intrinsic::riscv_vsuxei_mask: {
2495 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2496 IntNo == Intrinsic::riscv_vsuxei_mask;
2497 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2498 IntNo == Intrinsic::riscv_vsoxei_mask;
2499
2500 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2501 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2502
2503 unsigned CurOp = 2;
2504 SmallVector<SDValue, 8> Operands;
2505 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2506
2507 MVT IndexVT;
2508 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2509 /*IsStridedOrIndexed*/ true, Operands,
2510 /*IsLoad=*/false, &IndexVT);
2511
2513 "Element count mismatch");
2514
2517 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2518 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2519 reportFatalUsageError("The V extension does not support EEW=64 for "
2520 "index values when XLEN=32");
2521 }
2522 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2523 IsMasked, IsOrdered, IndexLog2EEW,
2524 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2525 MachineSDNode *Store =
2526 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2527
2528 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2529
2530 ReplaceNode(Node, Store);
2531 return;
2532 }
2533 case Intrinsic::riscv_vsm:
2534 case Intrinsic::riscv_vse:
2535 case Intrinsic::riscv_vse_mask:
2536 case Intrinsic::riscv_vsse:
2537 case Intrinsic::riscv_vsse_mask: {
2538 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2539 IntNo == Intrinsic::riscv_vsse_mask;
2540 bool IsStrided =
2541 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2542
2543 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2544 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2545
2546 unsigned CurOp = 2;
2547 SmallVector<SDValue, 8> Operands;
2548 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2549
2550 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2551 Operands);
2552
2554 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2555 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2556 MachineSDNode *Store =
2557 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2558 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2559
2560 ReplaceNode(Node, Store);
2561 return;
2562 }
2563 case Intrinsic::riscv_sf_vc_x_se:
2564 case Intrinsic::riscv_sf_vc_i_se:
2566 return;
2567 case Intrinsic::riscv_sf_vlte8:
2568 case Intrinsic::riscv_sf_vlte16:
2569 case Intrinsic::riscv_sf_vlte32:
2570 case Intrinsic::riscv_sf_vlte64: {
2571 unsigned Log2SEW;
2572 unsigned PseudoInst;
2573 switch (IntNo) {
2574 case Intrinsic::riscv_sf_vlte8:
2575 PseudoInst = RISCV::PseudoSF_VLTE8;
2576 Log2SEW = 3;
2577 break;
2578 case Intrinsic::riscv_sf_vlte16:
2579 PseudoInst = RISCV::PseudoSF_VLTE16;
2580 Log2SEW = 4;
2581 break;
2582 case Intrinsic::riscv_sf_vlte32:
2583 PseudoInst = RISCV::PseudoSF_VLTE32;
2584 Log2SEW = 5;
2585 break;
2586 case Intrinsic::riscv_sf_vlte64:
2587 PseudoInst = RISCV::PseudoSF_VLTE64;
2588 Log2SEW = 6;
2589 break;
2590 }
2591
2592 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2593 SDValue TWidenOp = CurDAG->getTargetConstant(1, DL, XLenVT);
2594 SDValue Operands[] = {Node->getOperand(2),
2595 Node->getOperand(3),
2596 Node->getOperand(4),
2597 SEWOp,
2598 TWidenOp,
2599 Node->getOperand(0)};
2600
2601 MachineSDNode *TileLoad =
2602 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2603 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2604 CurDAG->setNodeMemRefs(TileLoad, {MemOp->getMemOperand()});
2605
2606 ReplaceNode(Node, TileLoad);
2607 return;
2608 }
2609 case Intrinsic::riscv_sf_mm_s_s:
2610 case Intrinsic::riscv_sf_mm_s_u:
2611 case Intrinsic::riscv_sf_mm_u_s:
2612 case Intrinsic::riscv_sf_mm_u_u:
2613 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2614 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2615 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2616 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2617 case Intrinsic::riscv_sf_mm_f_f: {
2618 bool HasFRM = false;
2619 unsigned PseudoInst;
2620 switch (IntNo) {
2621 case Intrinsic::riscv_sf_mm_s_s:
2622 PseudoInst = RISCV::PseudoSF_MM_S_S;
2623 break;
2624 case Intrinsic::riscv_sf_mm_s_u:
2625 PseudoInst = RISCV::PseudoSF_MM_S_U;
2626 break;
2627 case Intrinsic::riscv_sf_mm_u_s:
2628 PseudoInst = RISCV::PseudoSF_MM_U_S;
2629 break;
2630 case Intrinsic::riscv_sf_mm_u_u:
2631 PseudoInst = RISCV::PseudoSF_MM_U_U;
2632 break;
2633 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2634 PseudoInst = RISCV::PseudoSF_MM_E5M2_E5M2;
2635 HasFRM = true;
2636 break;
2637 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2638 PseudoInst = RISCV::PseudoSF_MM_E5M2_E4M3;
2639 HasFRM = true;
2640 break;
2641 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2642 PseudoInst = RISCV::PseudoSF_MM_E4M3_E5M2;
2643 HasFRM = true;
2644 break;
2645 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2646 PseudoInst = RISCV::PseudoSF_MM_E4M3_E4M3;
2647 HasFRM = true;
2648 break;
2649 case Intrinsic::riscv_sf_mm_f_f:
2650 if (Node->getOperand(3).getValueType().getScalarType() == MVT::bf16)
2651 PseudoInst = RISCV::PseudoSF_MM_F_F_ALT;
2652 else
2653 PseudoInst = RISCV::PseudoSF_MM_F_F;
2654 HasFRM = true;
2655 break;
2656 }
2657 uint64_t TileNum = Node->getConstantOperandVal(2);
2658 SDValue Op1 = Node->getOperand(3);
2659 SDValue Op2 = Node->getOperand(4);
2660 MVT VT = Op1->getSimpleValueType(0);
2661 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2662 SDValue TmOp = Node->getOperand(5);
2663 SDValue TnOp = Node->getOperand(6);
2664 SDValue TkOp = Node->getOperand(7);
2665 SDValue TWidenOp = Node->getOperand(8);
2666 SDValue Chain = Node->getOperand(0);
2667
2668 // sf.mm.f.f with sew=32, twiden=2 is invalid
2669 if (IntNo == Intrinsic::riscv_sf_mm_f_f && Log2SEW == 5 &&
2670 TWidenOp->getAsZExtVal() == 2)
2671 reportFatalUsageError("sf.mm.f.f doesn't support (sew=32, twiden=2)");
2672
2673 SmallVector<SDValue, 10> Operands(
2674 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Op1, Op2});
2675 if (HasFRM)
2676 Operands.push_back(
2677 CurDAG->getTargetConstant(RISCVFPRndMode::DYN, DL, XLenVT));
2678 Operands.append({TmOp, TnOp, TkOp,
2679 CurDAG->getTargetConstant(Log2SEW, DL, XLenVT), TWidenOp,
2680 Chain});
2681
2682 auto *NewNode =
2683 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2684
2685 ReplaceNode(Node, NewNode);
2686 return;
2687 }
2688 case Intrinsic::riscv_sf_vtzero_t: {
2689 uint64_t TileNum = Node->getConstantOperandVal(2);
2690 SDValue Tm = Node->getOperand(3);
2691 SDValue Tn = Node->getOperand(4);
2692 SDValue Log2SEW = Node->getOperand(5);
2693 SDValue TWiden = Node->getOperand(6);
2694 SDValue Chain = Node->getOperand(0);
2695 auto *NewNode = CurDAG->getMachineNode(
2696 RISCV::PseudoSF_VTZERO_T, DL, Node->getVTList(),
2697 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Tm, Tn, Log2SEW,
2698 TWiden, Chain});
2699
2700 ReplaceNode(Node, NewNode);
2701 return;
2702 }
2703 }
2704 break;
2705 }
2706 case ISD::BITCAST: {
2707 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2708 // Just drop bitcasts between vectors if both are fixed or both are
2709 // scalable.
2710 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2711 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2712 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2713 CurDAG->RemoveDeadNode(Node);
2714 return;
2715 }
2716 if (Subtarget->enablePExtCodeGen()) {
2717 bool Is32BitCast =
2718 (VT == MVT::i32 && (SrcVT == MVT::v4i8 || SrcVT == MVT::v2i16)) ||
2719 (SrcVT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
2720 bool Is64BitCast =
2721 (VT == MVT::i64 && (SrcVT == MVT::v8i8 || SrcVT == MVT::v4i16 ||
2722 SrcVT == MVT::v2i32)) ||
2723 (SrcVT == MVT::i64 &&
2724 (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32));
2725 if (Is32BitCast || Is64BitCast) {
2726 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2727 CurDAG->RemoveDeadNode(Node);
2728 return;
2729 }
2730 }
2731 break;
2732 }
2734 if (Subtarget->enablePExtCodeGen()) {
2735 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2736 if ((VT == MVT::v2i32 && SrcVT == MVT::i64) ||
2737 (VT == MVT::v4i8 && SrcVT == MVT::i32)) {
2738 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2739 CurDAG->RemoveDeadNode(Node);
2740 return;
2741 }
2742 }
2743 break;
2745 case RISCVISD::TUPLE_INSERT: {
2746 SDValue V = Node->getOperand(0);
2747 SDValue SubV = Node->getOperand(1);
2748 SDLoc DL(SubV);
2749 auto Idx = Node->getConstantOperandVal(2);
2750 MVT SubVecVT = SubV.getSimpleValueType();
2751
2752 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2753 MVT SubVecContainerVT = SubVecVT;
2754 // Establish the correct scalable-vector types for any fixed-length type.
2755 if (SubVecVT.isFixedLengthVector()) {
2756 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2758 [[maybe_unused]] bool ExactlyVecRegSized =
2759 Subtarget->expandVScale(SubVecVT.getSizeInBits())
2760 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2761 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2762 .getKnownMinValue()));
2763 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2764 }
2765 MVT ContainerVT = VT;
2766 if (VT.isFixedLengthVector())
2767 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2768
2769 const auto *TRI = Subtarget->getRegisterInfo();
2770 unsigned SubRegIdx;
2771 std::tie(SubRegIdx, Idx) =
2773 ContainerVT, SubVecContainerVT, Idx, TRI);
2774
2775 // If the Idx hasn't been completely eliminated then this is a subvector
2776 // insert which doesn't naturally align to a vector register. These must
2777 // be handled using instructions to manipulate the vector registers.
2778 if (Idx != 0)
2779 break;
2780
2781 RISCVVType::VLMUL SubVecLMUL =
2782 RISCVTargetLowering::getLMUL(SubVecContainerVT);
2783 [[maybe_unused]] bool IsSubVecPartReg =
2784 SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
2785 SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
2786 SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
2787 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
2788 V.isUndef()) &&
2789 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2790 "the subvector is smaller than a full-sized register");
2791
2792 // If we haven't set a SubRegIdx, then we must be going between
2793 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2794 if (SubRegIdx == RISCV::NoSubRegister) {
2795 unsigned InRegClassID =
2798 InRegClassID &&
2799 "Unexpected subvector extraction");
2800 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2801 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2802 DL, VT, SubV, RC);
2803 ReplaceNode(Node, NewNode);
2804 return;
2805 }
2806
2807 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2808 ReplaceNode(Node, Insert.getNode());
2809 return;
2810 }
2812 case RISCVISD::TUPLE_EXTRACT: {
2813 SDValue V = Node->getOperand(0);
2814 auto Idx = Node->getConstantOperandVal(1);
2815 MVT InVT = V.getSimpleValueType();
2816 SDLoc DL(V);
2817
2818 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2819 MVT SubVecContainerVT = VT;
2820 // Establish the correct scalable-vector types for any fixed-length type.
2821 if (VT.isFixedLengthVector()) {
2822 assert(Idx == 0);
2823 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2824 }
2825 if (InVT.isFixedLengthVector())
2826 InVT = TLI.getContainerForFixedLengthVector(InVT);
2827
2828 const auto *TRI = Subtarget->getRegisterInfo();
2829 unsigned SubRegIdx;
2830 std::tie(SubRegIdx, Idx) =
2832 InVT, SubVecContainerVT, Idx, TRI);
2833
2834 // If the Idx hasn't been completely eliminated then this is a subvector
2835 // extract which doesn't naturally align to a vector register. These must
2836 // be handled using instructions to manipulate the vector registers.
2837 if (Idx != 0)
2838 break;
2839
2840 // If we haven't set a SubRegIdx, then we must be going between
2841 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2842 if (SubRegIdx == RISCV::NoSubRegister) {
2843 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2845 InRegClassID &&
2846 "Unexpected subvector extraction");
2847 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2848 SDNode *NewNode =
2849 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2850 ReplaceNode(Node, NewNode);
2851 return;
2852 }
2853
2854 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2855 ReplaceNode(Node, Extract.getNode());
2856 return;
2857 }
2858 case RISCVISD::VMV_S_X_VL:
2859 case RISCVISD::VFMV_S_F_VL:
2860 case RISCVISD::VMV_V_X_VL:
2861 case RISCVISD::VFMV_V_F_VL: {
2862 // Try to match splat of a scalar load to a strided load with stride of x0.
2863 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2864 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2865 if (!Node->getOperand(0).isUndef())
2866 break;
2867 SDValue Src = Node->getOperand(1);
2868 auto *Ld = dyn_cast<LoadSDNode>(Src);
2869 // Can't fold load update node because the second
2870 // output is used so that load update node can't be removed.
2871 if (!Ld || Ld->isIndexed())
2872 break;
2873 EVT MemVT = Ld->getMemoryVT();
2874 // The memory VT should be the same size as the element type.
2875 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2876 break;
2877 if (!IsProfitableToFold(Src, Node, Node) ||
2878 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2879 break;
2880
2881 SDValue VL;
2882 if (IsScalarMove) {
2883 // We could deal with more VL if we update the VSETVLI insert pass to
2884 // avoid introducing more VSETVLI.
2885 if (!isOneConstant(Node->getOperand(2)))
2886 break;
2887 selectVLOp(Node->getOperand(2), VL);
2888 } else
2889 selectVLOp(Node->getOperand(2), VL);
2890
2891 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2892 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2893
2894 // If VL=1, then we don't need to do a strided load and can just do a
2895 // regular load.
2896 bool IsStrided = !isOneConstant(VL);
2897
2898 // Only do a strided load if we have optimized zero-stride vector load.
2899 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2900 break;
2901
2902 SmallVector<SDValue> Operands = {
2903 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2904 Ld->getBasePtr()};
2905 if (IsStrided)
2906 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2908 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2909 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2910
2912 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2913 /*IsMasked*/ false, IsStrided, /*FF*/ false,
2914 Log2SEW, static_cast<unsigned>(LMUL));
2915 MachineSDNode *Load =
2916 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2917 // Update the chain.
2918 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2919 // Record the mem-refs
2920 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2921 // Replace the splat with the vlse.
2922 ReplaceNode(Node, Load);
2923 return;
2924 }
2925 case ISD::PREFETCH:
2926 unsigned Locality = Node->getConstantOperandVal(3);
2927 if (Locality > 2)
2928 break;
2929
2930 auto *LoadStoreMem = cast<MemSDNode>(Node);
2931 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2933
2934 int NontemporalLevel = 0;
2935 switch (Locality) {
2936 case 0:
2937 NontemporalLevel = 3; // NTL.ALL
2938 break;
2939 case 1:
2940 NontemporalLevel = 1; // NTL.PALL
2941 break;
2942 case 2:
2943 NontemporalLevel = 0; // NTL.P1
2944 break;
2945 default:
2946 llvm_unreachable("unexpected locality value.");
2947 }
2948
2949 if (NontemporalLevel & 0b1)
2951 if (NontemporalLevel & 0b10)
2953 break;
2954 }
2955
2956 // Select the default instruction.
2957 SelectCode(Node);
2958}
2959
2961 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2962 std::vector<SDValue> &OutOps) {
2963 // Always produce a register and immediate operand, as expected by
2964 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2965 switch (ConstraintID) {
2968 SDValue Op0, Op1;
2969 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2970 assert(Found && "SelectAddrRegImm should always succeed");
2971 OutOps.push_back(Op0);
2972 OutOps.push_back(Op1);
2973 return false;
2974 }
2976 OutOps.push_back(Op);
2977 OutOps.push_back(
2978 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2979 return false;
2980 default:
2981 report_fatal_error("Unexpected asm memory constraint " +
2982 InlineAsm::getMemConstraintName(ConstraintID));
2983 }
2984
2985 return true;
2986}
2987
2989 SDValue &Offset) {
2990 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2991 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2992 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2993 return true;
2994 }
2995
2996 return false;
2997}
2998
2999// Fold constant addresses.
3000static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
3001 const MVT VT, const RISCVSubtarget *Subtarget,
3003 bool IsPrefetch = false) {
3004 if (!isa<ConstantSDNode>(Addr))
3005 return false;
3006
3007 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
3008
3009 // If the constant is a simm12, we can fold the whole constant and use X0 as
3010 // the base. If the constant can be materialized with LUI+simm12, use LUI as
3011 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
3012 int64_t Lo12 = SignExtend64<12>(CVal);
3013 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
3014 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
3015 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3016 return false;
3017 if (Hi) {
3018 int64_t Hi20 = (Hi >> 12) & 0xfffff;
3019 Base = SDValue(
3020 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
3021 CurDAG->getTargetConstant(Hi20, DL, VT)),
3022 0);
3023 } else {
3024 Base = CurDAG->getRegister(RISCV::X0, VT);
3025 }
3026 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
3027 return true;
3028 }
3029
3030 // Ask how constant materialization would handle this constant.
3031 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
3032
3033 // If the last instruction would be an ADDI, we can fold its immediate and
3034 // emit the rest of the sequence as the base.
3035 if (Seq.back().getOpcode() != RISCV::ADDI)
3036 return false;
3037 Lo12 = Seq.back().getImm();
3038 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3039 return false;
3040
3041 // Drop the last instruction.
3042 Seq.pop_back();
3043 assert(!Seq.empty() && "Expected more instructions in sequence");
3044
3045 Base = selectImmSeq(CurDAG, DL, VT, Seq);
3046 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
3047 return true;
3048}
3049
3050// Is this ADD instruction only used as the base pointer of scalar loads and
3051// stores?
3053 for (auto *User : Add->users()) {
3054 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
3055 User->getOpcode() != RISCVISD::LD_RV32 &&
3056 User->getOpcode() != RISCVISD::SD_RV32 &&
3057 User->getOpcode() != ISD::ATOMIC_LOAD &&
3058 User->getOpcode() != ISD::ATOMIC_STORE)
3059 return false;
3060 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3061 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
3062 VT != MVT::f64)
3063 return false;
3064 // Don't allow stores of the value. It must be used as the address.
3065 if (User->getOpcode() == ISD::STORE &&
3066 cast<StoreSDNode>(User)->getValue() == Add)
3067 return false;
3068 if (User->getOpcode() == ISD::ATOMIC_STORE &&
3069 cast<AtomicSDNode>(User)->getVal() == Add)
3070 return false;
3071 if (User->getOpcode() == RISCVISD::SD_RV32 &&
3072 (User->getOperand(0) == Add || User->getOperand(1) == Add))
3073 return false;
3074 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
3075 return false;
3076 }
3077
3078 return true;
3079}
3080
3082 switch (User->getOpcode()) {
3083 default:
3084 return false;
3085 case ISD::LOAD:
3086 case RISCVISD::LD_RV32:
3087 case ISD::ATOMIC_LOAD:
3088 break;
3089 case ISD::STORE:
3090 // Don't allow stores of Add. It must only be used as the address.
3091 if (cast<StoreSDNode>(User)->getValue() == Add)
3092 return false;
3093 break;
3094 case RISCVISD::SD_RV32:
3095 // Don't allow stores of Add. It must only be used as the address.
3096 if (User->getOperand(0) == Add || User->getOperand(1) == Add)
3097 return false;
3098 break;
3099 case ISD::ATOMIC_STORE:
3100 // Don't allow stores of Add. It must only be used as the address.
3101 if (cast<AtomicSDNode>(User)->getVal() == Add)
3102 return false;
3103 break;
3104 }
3105
3106 return true;
3107}
3108
3109// To prevent SelectAddrRegImm from folding offsets that conflict with the
3110// fusion of PseudoMovAddr, check if the offset of every use of a given address
3111// is within the alignment.
3113 Align Alignment) {
3114 assert(Addr->getOpcode() == RISCVISD::ADD_LO);
3115 for (auto *User : Addr->users()) {
3116 // If the user is a load or store, then the offset is 0 which is always
3117 // within alignment.
3118 if (isRegImmLoadOrStore(User, Addr))
3119 continue;
3120
3121 if (CurDAG->isBaseWithConstantOffset(SDValue(User, 0))) {
3122 int64_t CVal = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3123 if (!isInt<12>(CVal) || Alignment <= CVal)
3124 return false;
3125
3126 // Make sure all uses are foldable load/stores.
3127 for (auto *AddUser : User->users())
3128 if (!isRegImmLoadOrStore(AddUser, SDValue(User, 0)))
3129 return false;
3130
3131 continue;
3132 }
3133
3134 return false;
3135 }
3136
3137 return true;
3138}
3139
3141 SDValue &Offset) {
3142 if (SelectAddrFrameIndex(Addr, Base, Offset))
3143 return true;
3144
3145 SDLoc DL(Addr);
3146 MVT VT = Addr.getSimpleValueType();
3147
3148 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
3149 bool CanFold = true;
3150 // Unconditionally fold if operand 1 is not a global address (e.g.
3151 // externsymbol)
3152 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
3153 const DataLayout &DL = CurDAG->getDataLayout();
3154 Align Alignment = commonAlignment(
3155 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3156 if (!areOffsetsWithinAlignment(Addr, Alignment))
3157 CanFold = false;
3158 }
3159 if (CanFold) {
3160 Base = Addr.getOperand(0);
3161 Offset = Addr.getOperand(1);
3162 return true;
3163 }
3164 }
3165
3166 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3167 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3168 if (isInt<12>(CVal)) {
3169 Base = Addr.getOperand(0);
3170 if (Base.getOpcode() == RISCVISD::ADD_LO) {
3171 SDValue LoOperand = Base.getOperand(1);
3172 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
3173 // If the Lo in (ADD_LO hi, lo) is a global variable's address
3174 // (its low part, really), then we can rely on the alignment of that
3175 // variable to provide a margin of safety before low part can overflow
3176 // the 12 bits of the load/store offset. Check if CVal falls within
3177 // that margin; if so (low part + CVal) can't overflow.
3178 const DataLayout &DL = CurDAG->getDataLayout();
3179 Align Alignment = commonAlignment(
3180 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3181 if ((CVal == 0 || Alignment > CVal) &&
3182 areOffsetsWithinAlignment(Base, Alignment)) {
3183 int64_t CombinedOffset = CVal + GA->getOffset();
3184 Base = Base.getOperand(0);
3185 Offset = CurDAG->getTargetGlobalAddress(
3186 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
3187 CombinedOffset, GA->getTargetFlags());
3188 return true;
3189 }
3190 }
3191 }
3192
3193 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3194 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3195 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3196 return true;
3197 }
3198 }
3199
3200 // Handle ADD with large immediates.
3201 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3202 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3203 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3204
3205 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
3206 // an ADDI for part of the offset and fold the rest into the load/store.
3207 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
3208 if (CVal >= -4096 && CVal <= 4094) {
3209 int64_t Adj = CVal < 0 ? -2048 : 2047;
3210 Base = SDValue(
3211 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
3212 CurDAG->getSignedTargetConstant(Adj, DL, VT)),
3213 0);
3214 Offset = CurDAG->getSignedTargetConstant(CVal - Adj, DL, VT);
3215 return true;
3216 }
3217
3218 // For larger immediates, we might be able to save one instruction from
3219 // constant materialization by folding the Lo12 bits of the immediate into
3220 // the address. We should only do this if the ADD is only used by loads and
3221 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
3222 // separately with the full materialized immediate creating extra
3223 // instructions.
3224 if (isWorthFoldingAdd(Addr) &&
3225 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3226 Offset, /*IsPrefetch=*/false)) {
3227 // Insert an ADD instruction with the materialized Hi52 bits.
3228 Base = SDValue(
3229 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3230 0);
3231 return true;
3232 }
3233 }
3234
3235 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3236 /*IsPrefetch=*/false))
3237 return true;
3238
3239 Base = Addr;
3240 Offset = CurDAG->getTargetConstant(0, DL, VT);
3241 return true;
3242}
3243
3244/// Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
3246 SDValue &Offset) {
3247 if (SelectAddrFrameIndex(Addr, Base, Offset))
3248 return true;
3249
3250 SDLoc DL(Addr);
3251 MVT VT = Addr.getSimpleValueType();
3252
3253 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3254 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3255 if (isUInt<9>(CVal)) {
3256 Base = Addr.getOperand(0);
3257
3258 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3259 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3260 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3261 return true;
3262 }
3263 }
3264
3265 Base = Addr;
3266 Offset = CurDAG->getTargetConstant(0, DL, VT);
3267 return true;
3268}
3269
3270/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
3271/// Offset should be all zeros.
3273 SDValue &Offset) {
3274 if (SelectAddrFrameIndex(Addr, Base, Offset))
3275 return true;
3276
3277 SDLoc DL(Addr);
3278 MVT VT = Addr.getSimpleValueType();
3279
3280 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3281 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3282 if (isInt<12>(CVal)) {
3283 Base = Addr.getOperand(0);
3284
3285 // Early-out if not a valid offset.
3286 if ((CVal & 0b11111) != 0) {
3287 Base = Addr;
3288 Offset = CurDAG->getTargetConstant(0, DL, VT);
3289 return true;
3290 }
3291
3292 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3293 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3294 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3295 return true;
3296 }
3297 }
3298
3299 // Handle ADD with large immediates.
3300 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3301 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3302 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3303
3304 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
3305 // one instruction by folding adjustment (-2048 or 2016) into the address.
3306 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
3307 int64_t Adj = CVal < 0 ? -2048 : 2016;
3308 int64_t AdjustedOffset = CVal - Adj;
3309 Base =
3310 SDValue(CurDAG->getMachineNode(
3311 RISCV::ADDI, DL, VT, Addr.getOperand(0),
3312 CurDAG->getSignedTargetConstant(AdjustedOffset, DL, VT)),
3313 0);
3314 Offset = CurDAG->getSignedTargetConstant(Adj, DL, VT);
3315 return true;
3316 }
3317
3318 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3319 Offset, /*IsPrefetch=*/true)) {
3320 // Insert an ADD instruction with the materialized Hi52 bits.
3321 Base = SDValue(
3322 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3323 0);
3324 return true;
3325 }
3326 }
3327
3328 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3329 /*IsPrefetch=*/true))
3330 return true;
3331
3332 Base = Addr;
3333 Offset = CurDAG->getTargetConstant(0, DL, VT);
3334 return true;
3335}
3336
3337/// Return true if this a load/store that we have a RegRegScale instruction for.
3339 const RISCVSubtarget &Subtarget) {
3340 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE)
3341 return false;
3342 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3343 if (!(VT.isScalarInteger() &&
3344 (Subtarget.hasVendorXTHeadMemIdx() || Subtarget.hasVendorXqcisls())) &&
3345 !((VT == MVT::f32 || VT == MVT::f64) &&
3346 Subtarget.hasVendorXTHeadFMemIdx()))
3347 return false;
3348 // Don't allow stores of the value. It must be used as the address.
3349 if (User->getOpcode() == ISD::STORE &&
3350 cast<StoreSDNode>(User)->getValue() == Add)
3351 return false;
3352
3353 return true;
3354}
3355
3356/// Is it profitable to fold this Add into RegRegScale load/store. If \p
3357/// Shift is non-null, then we have matched a shl+add. We allow reassociating
3358/// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) if there is a
3359/// single addi and we don't have a SHXADD instruction we could use.
3360/// FIXME: May still need to check how many and what kind of users the SHL has.
3362 SDValue Add,
3363 SDValue Shift = SDValue()) {
3364 bool FoundADDI = false;
3365 for (auto *User : Add->users()) {
3366 if (isRegRegScaleLoadOrStore(User, Add, Subtarget))
3367 continue;
3368
3369 // Allow a single ADDI that is used by loads/stores if we matched a shift.
3370 if (!Shift || FoundADDI || User->getOpcode() != ISD::ADD ||
3372 !isInt<12>(cast<ConstantSDNode>(User->getOperand(1))->getSExtValue()))
3373 return false;
3374
3375 FoundADDI = true;
3376
3377 // If we have a SHXADD instruction, prefer that over reassociating an ADDI.
3378 assert(Shift.getOpcode() == ISD::SHL);
3379 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
3380 if (Subtarget.hasShlAdd(ShiftAmt))
3381 return false;
3382
3383 // All users of the ADDI should be load/store.
3384 for (auto *ADDIUser : User->users())
3385 if (!isRegRegScaleLoadOrStore(ADDIUser, SDValue(User, 0), Subtarget))
3386 return false;
3387 }
3388
3389 return true;
3390}
3391
3393 unsigned MaxShiftAmount,
3394 SDValue &Base, SDValue &Index,
3395 SDValue &Scale) {
3396 if (Addr.getOpcode() != ISD::ADD)
3397 return false;
3398 SDValue LHS = Addr.getOperand(0);
3399 SDValue RHS = Addr.getOperand(1);
3400
3401 EVT VT = Addr.getSimpleValueType();
3402 auto SelectShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
3403 SDValue &Shift) {
3404 if (N.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(N.getOperand(1)))
3405 return false;
3406
3407 // Only match shifts by a value in range [0, MaxShiftAmount].
3408 unsigned ShiftAmt = N.getConstantOperandVal(1);
3409 if (ShiftAmt > MaxShiftAmount)
3410 return false;
3411
3412 Index = N.getOperand(0);
3413 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
3414 return true;
3415 };
3416
3417 if (auto *C1 = dyn_cast<ConstantSDNode>(RHS)) {
3418 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
3419 if (LHS.getOpcode() == ISD::ADD &&
3420 !isa<ConstantSDNode>(LHS.getOperand(1)) &&
3421 isInt<12>(C1->getSExtValue())) {
3422 if (SelectShl(LHS.getOperand(1), Index, Scale) &&
3423 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(1))) {
3424 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3425 SDLoc(Addr), VT);
3426 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3427 LHS.getOperand(0), C1Val),
3428 0);
3429 return true;
3430 }
3431
3432 // Add is commutative so we need to check both operands.
3433 if (SelectShl(LHS.getOperand(0), Index, Scale) &&
3434 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(0))) {
3435 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3436 SDLoc(Addr), VT);
3437 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3438 LHS.getOperand(1), C1Val),
3439 0);
3440 return true;
3441 }
3442 }
3443
3444 // Don't match add with constants.
3445 // FIXME: Is this profitable for large constants that have 0s in the lower
3446 // 12 bits that we can materialize with LUI?
3447 return false;
3448 }
3449
3450 // Try to match a shift on the RHS.
3451 if (SelectShl(RHS, Index, Scale)) {
3452 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, RHS))
3453 return false;
3454 Base = LHS;
3455 return true;
3456 }
3457
3458 // Try to match a shift on the LHS.
3459 if (SelectShl(LHS, Index, Scale)) {
3460 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, LHS))
3461 return false;
3462 Base = RHS;
3463 return true;
3464 }
3465
3466 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr))
3467 return false;
3468
3469 Base = LHS;
3470 Index = RHS;
3471 Scale = CurDAG->getTargetConstant(0, SDLoc(Addr), VT);
3472 return true;
3473}
3474
3476 unsigned MaxShiftAmount,
3477 unsigned Bits, SDValue &Base,
3478 SDValue &Index,
3479 SDValue &Scale) {
3480 if (!SelectAddrRegRegScale(Addr, MaxShiftAmount, Base, Index, Scale))
3481 return false;
3482
3483 if (Index.getOpcode() == ISD::AND) {
3484 auto *C = dyn_cast<ConstantSDNode>(Index.getOperand(1));
3485 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3486 Index = Index.getOperand(0);
3487 return true;
3488 }
3489 }
3490
3491 return false;
3492}
3493
3495 SDValue &Offset) {
3496 if (Addr.getOpcode() != ISD::ADD)
3497 return false;
3498
3499 if (isa<ConstantSDNode>(Addr.getOperand(1)))
3500 return false;
3501
3502 Base = Addr.getOperand(0);
3503 Offset = Addr.getOperand(1);
3504 return true;
3505}
3506
3508 SDValue &ShAmt) {
3509 ShAmt = N;
3510
3511 // Peek through zext.
3512 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
3513 ShAmt = ShAmt.getOperand(0);
3514
3515 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
3516 // amount. If there is an AND on the shift amount, we can bypass it if it
3517 // doesn't affect any of those bits.
3518 if (ShAmt.getOpcode() == ISD::AND &&
3519 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3520 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
3521
3522 // Since the max shift amount is a power of 2 we can subtract 1 to make a
3523 // mask that covers the bits needed to represent all shift amounts.
3524 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
3525 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
3526
3527 if (ShMask.isSubsetOf(AndMask)) {
3528 ShAmt = ShAmt.getOperand(0);
3529 } else {
3530 // SimplifyDemandedBits may have optimized the mask so try restoring any
3531 // bits that are known zero.
3532 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
3533 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
3534 return true;
3535 ShAmt = ShAmt.getOperand(0);
3536 }
3537 }
3538
3539 if (ShAmt.getOpcode() == ISD::ADD &&
3540 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3541 uint64_t Imm = ShAmt.getConstantOperandVal(1);
3542 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
3543 // to avoid the ADD.
3544 if (Imm != 0 && Imm % ShiftWidth == 0) {
3545 ShAmt = ShAmt.getOperand(0);
3546 return true;
3547 }
3548 } else if (ShAmt.getOpcode() == ISD::SUB &&
3549 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
3550 uint64_t Imm = ShAmt.getConstantOperandVal(0);
3551 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3552 // generate a NEG instead of a SUB of a constant.
3553 if (Imm != 0 && Imm % ShiftWidth == 0) {
3554 SDLoc DL(ShAmt);
3555 EVT VT = ShAmt.getValueType();
3556 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
3557 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
3558 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
3559 ShAmt.getOperand(1));
3560 ShAmt = SDValue(Neg, 0);
3561 return true;
3562 }
3563 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3564 // to generate a NOT instead of a SUB of a constant.
3565 if (Imm % ShiftWidth == ShiftWidth - 1) {
3566 SDLoc DL(ShAmt);
3567 EVT VT = ShAmt.getValueType();
3568 MachineSDNode *Not = CurDAG->getMachineNode(
3569 RISCV::XORI, DL, VT, ShAmt.getOperand(1),
3570 CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true));
3571 ShAmt = SDValue(Not, 0);
3572 return true;
3573 }
3574 }
3575
3576 return true;
3577}
3578
3579/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
3580/// check for equality with 0. This function emits instructions that convert the
3581/// seteq/setne into something that can be compared with 0.
3582/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
3583/// ISD::SETNE).
3585 SDValue &Val) {
3586 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
3587 "Unexpected condition code!");
3588
3589 // We're looking for a setcc.
3590 if (N->getOpcode() != ISD::SETCC)
3591 return false;
3592
3593 // Must be an equality comparison.
3594 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
3595 if (CCVal != ExpectedCCVal)
3596 return false;
3597
3598 SDValue LHS = N->getOperand(0);
3599 SDValue RHS = N->getOperand(1);
3600
3601 if (!LHS.getValueType().isScalarInteger())
3602 return false;
3603
3604 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
3605 if (isNullConstant(RHS)) {
3606 Val = LHS;
3607 return true;
3608 }
3609
3610 SDLoc DL(N);
3611
3612 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
3613 int64_t CVal = C->getSExtValue();
3614 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
3615 // non-zero otherwise.
3616 if (CVal == -2048) {
3617 Val = SDValue(
3618 CurDAG->getMachineNode(
3619 RISCV::XORI, DL, N->getValueType(0), LHS,
3620 CurDAG->getSignedTargetConstant(CVal, DL, N->getValueType(0))),
3621 0);
3622 return true;
3623 }
3624 // If the RHS is [-2047,2048], we can use addi/addiw with -RHS to produce 0
3625 // if the LHS is equal to the RHS and non-zero otherwise.
3626 if (isInt<12>(CVal) || CVal == 2048) {
3627 unsigned Opc = RISCV::ADDI;
3628 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3629 cast<VTSDNode>(LHS.getOperand(1))->getVT() == MVT::i32) {
3630 Opc = RISCV::ADDIW;
3631 LHS = LHS.getOperand(0);
3632 }
3633
3634 Val = SDValue(CurDAG->getMachineNode(Opc, DL, N->getValueType(0), LHS,
3635 CurDAG->getSignedTargetConstant(
3636 -CVal, DL, N->getValueType(0))),
3637 0);
3638 return true;
3639 }
3640 if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) {
3641 Val = SDValue(
3642 CurDAG->getMachineNode(
3643 RISCV::BINVI, DL, N->getValueType(0), LHS,
3644 CurDAG->getTargetConstant(Log2_64(CVal), DL, N->getValueType(0))),
3645 0);
3646 return true;
3647 }
3648 // Same as the addi case above but for larger immediates (signed 26-bit) use
3649 // the QC_E_ADDI instruction from the Xqcilia extension, if available. Avoid
3650 // anything which can be done with a single lui as it might be compressible.
3651 if (Subtarget->hasVendorXqcilia() && isInt<26>(CVal) &&
3652 (CVal & 0xFFF) != 0) {
3653 Val = SDValue(
3654 CurDAG->getMachineNode(
3655 RISCV::QC_E_ADDI, DL, N->getValueType(0), LHS,
3656 CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
3657 0);
3658 return true;
3659 }
3660 }
3661
3662 // If nothing else we can XOR the LHS and RHS to produce zero if they are
3663 // equal and a non-zero value if they aren't.
3664 Val = SDValue(
3665 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
3666 return true;
3667}
3668
3670 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3671 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
3672 Val = N.getOperand(0);
3673 return true;
3674 }
3675
3676 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
3677 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
3678 return N;
3679
3680 SDValue N0 = N.getOperand(0);
3681 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3682 N.getConstantOperandVal(1) == ShiftAmt &&
3683 N0.getConstantOperandVal(1) == ShiftAmt)
3684 return N0.getOperand(0);
3685
3686 return N;
3687 };
3688
3689 MVT VT = N.getSimpleValueType();
3690 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
3691 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
3692 return true;
3693 }
3694
3695 return false;
3696}
3697
3699 if (N.getOpcode() == ISD::AND) {
3700 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3701 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3702 Val = N.getOperand(0);
3703 return true;
3704 }
3705 }
3706 MVT VT = N.getSimpleValueType();
3707 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
3708 if (CurDAG->MaskedValueIsZero(N, Mask)) {
3709 Val = N;
3710 return true;
3711 }
3712
3713 return false;
3714}
3715
3716/// Look for various patterns that can be done with a SHL that can be folded
3717/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
3718/// SHXADD we are trying to match.
3720 SDValue &Val) {
3721 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
3722 SDValue N0 = N.getOperand(0);
3723
3724 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
3725 (LeftShift || N0.getOpcode() == ISD::SRL) &&
3727 uint64_t Mask = N.getConstantOperandVal(1);
3728 unsigned C2 = N0.getConstantOperandVal(1);
3729
3730 unsigned XLen = Subtarget->getXLen();
3731 if (LeftShift)
3732 Mask &= maskTrailingZeros<uint64_t>(C2);
3733 else
3734 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
3735
3736 if (isShiftedMask_64(Mask)) {
3737 unsigned Leading = XLen - llvm::bit_width(Mask);
3738 unsigned Trailing = llvm::countr_zero(Mask);
3739 if (Trailing != ShAmt)
3740 return false;
3741
3742 unsigned Opcode;
3743 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
3744 // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
3745 // followed by a SHXADD with c3 for the X amount.
3746 if (LeftShift && Leading == 0 && C2 < Trailing)
3747 Opcode = RISCV::SRLI;
3748 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
3749 // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
3750 // followed by a SHXADD with c3 for the X amount.
3751 else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
3752 Opcode = RISCV::SRLIW;
3753 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3754 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
3755 // followed by a SHXADD using c3 for the X amount.
3756 else if (!LeftShift && Leading == C2)
3757 Opcode = RISCV::SRLI;
3758 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
3759 // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
3760 // followed by a SHXADD using c3 for the X amount.
3761 else if (!LeftShift && Leading == 32 + C2)
3762 Opcode = RISCV::SRLIW;
3763 else
3764 return false;
3765
3766 SDLoc DL(N);
3767 EVT VT = N.getValueType();
3768 ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
3769 Val = SDValue(
3770 CurDAG->getMachineNode(Opcode, DL, VT, N0.getOperand(0),
3771 CurDAG->getTargetConstant(ShAmt, DL, VT)),
3772 0);
3773 return true;
3774 }
3775 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
3777 uint64_t Mask = N.getConstantOperandVal(1);
3778 unsigned C2 = N0.getConstantOperandVal(1);
3779
3780 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
3781 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
3782 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
3783 // the X amount.
3784 if (isShiftedMask_64(Mask)) {
3785 unsigned XLen = Subtarget->getXLen();
3786 unsigned Leading = XLen - llvm::bit_width(Mask);
3787 unsigned Trailing = llvm::countr_zero(Mask);
3788 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
3789 SDLoc DL(N);
3790 EVT VT = N.getValueType();
3791 Val = SDValue(CurDAG->getMachineNode(
3792 RISCV::SRAI, DL, VT, N0.getOperand(0),
3793 CurDAG->getTargetConstant(C2 - Leading, DL, VT)),
3794 0);
3795 Val = SDValue(CurDAG->getMachineNode(
3796 RISCV::SRLI, DL, VT, Val,
3797 CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)),
3798 0);
3799 return true;
3800 }
3801 }
3802 }
3803 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
3804 (LeftShift || N.getOpcode() == ISD::SRL) &&
3805 isa<ConstantSDNode>(N.getOperand(1))) {
3806 SDValue N0 = N.getOperand(0);
3807 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
3809 uint64_t Mask = N0.getConstantOperandVal(1);
3810 if (isShiftedMask_64(Mask)) {
3811 unsigned C1 = N.getConstantOperandVal(1);
3812 unsigned XLen = Subtarget->getXLen();
3813 unsigned Leading = XLen - llvm::bit_width(Mask);
3814 unsigned Trailing = llvm::countr_zero(Mask);
3815 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
3816 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
3817 if (LeftShift && Leading == 32 && Trailing > 0 &&
3818 (Trailing + C1) == ShAmt) {
3819 SDLoc DL(N);
3820 EVT VT = N.getValueType();
3821 Val = SDValue(CurDAG->getMachineNode(
3822 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3823 CurDAG->getTargetConstant(Trailing, DL, VT)),
3824 0);
3825 return true;
3826 }
3827 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3828 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3829 if (!LeftShift && Leading == 32 && Trailing > C1 &&
3830 (Trailing - C1) == ShAmt) {
3831 SDLoc DL(N);
3832 EVT VT = N.getValueType();
3833 Val = SDValue(CurDAG->getMachineNode(
3834 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3835 CurDAG->getTargetConstant(Trailing, DL, VT)),
3836 0);
3837 return true;
3838 }
3839 }
3840 }
3841 }
3842
3843 return false;
3844}
3845
3846/// Look for various patterns that can be done with a SHL that can be folded
3847/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3848/// SHXADD_UW we are trying to match.
3850 SDValue &Val) {
3851 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
3852 N.hasOneUse()) {
3853 SDValue N0 = N.getOperand(0);
3854 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3855 N0.hasOneUse()) {
3856 uint64_t Mask = N.getConstantOperandVal(1);
3857 unsigned C2 = N0.getConstantOperandVal(1);
3858
3859 Mask &= maskTrailingZeros<uint64_t>(C2);
3860
3861 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3862 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3863 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3864 if (isShiftedMask_64(Mask)) {
3865 unsigned Leading = llvm::countl_zero(Mask);
3866 unsigned Trailing = llvm::countr_zero(Mask);
3867 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3868 SDLoc DL(N);
3869 EVT VT = N.getValueType();
3870 Val = SDValue(CurDAG->getMachineNode(
3871 RISCV::SLLI, DL, VT, N0.getOperand(0),
3872 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
3873 0);
3874 return true;
3875 }
3876 }
3877 }
3878 }
3879
3880 return false;
3881}
3882
3884 assert(N->getOpcode() == ISD::OR || N->getOpcode() == RISCVISD::OR_VL);
3885 if (N->getFlags().hasDisjoint())
3886 return true;
3887 return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
3888}
3889
3890bool RISCVDAGToDAGISel::selectImm64IfCheaper(int64_t Imm, int64_t OrigImm,
3891 SDValue N, SDValue &Val) {
3892 int OrigCost = RISCVMatInt::getIntMatCost(APInt(64, OrigImm), 64, *Subtarget,
3893 /*CompressionCost=*/true);
3894 int Cost = RISCVMatInt::getIntMatCost(APInt(64, Imm), 64, *Subtarget,
3895 /*CompressionCost=*/true);
3896 if (OrigCost <= Cost)
3897 return false;
3898
3899 Val = selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), Imm, *Subtarget);
3900 return true;
3901}
3902
3904 if (!isa<ConstantSDNode>(N))
3905 return false;
3906 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3907 if ((Imm >> 31) != 1)
3908 return false;
3909
3910 for (const SDNode *U : N->users()) {
3911 switch (U->getOpcode()) {
3912 case ISD::ADD:
3913 break;
3914 case ISD::OR:
3915 if (orDisjoint(U))
3916 break;
3917 return false;
3918 default:
3919 return false;
3920 }
3921 }
3922
3923 return selectImm64IfCheaper(0xffffffff00000000 | Imm, Imm, N, Val);
3924}
3925
3927 if (!isa<ConstantSDNode>(N))
3928 return false;
3929 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3930 if (isInt<32>(Imm))
3931 return false;
3932
3933 for (const SDNode *U : N->users()) {
3934 switch (U->getOpcode()) {
3935 case ISD::ADD:
3936 break;
3937 case RISCVISD::VMV_V_X_VL:
3938 if (!all_of(U->users(), [](const SDNode *V) {
3939 return V->getOpcode() == ISD::ADD ||
3940 V->getOpcode() == RISCVISD::ADD_VL;
3941 }))
3942 return false;
3943 break;
3944 default:
3945 return false;
3946 }
3947 }
3948
3949 return selectImm64IfCheaper(-Imm, Imm, N, Val);
3950}
3951
3953 if (!isa<ConstantSDNode>(N))
3954 return false;
3955 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3956
3957 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
3958 if (isInt<32>(Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
3959 return false;
3960
3961 // Abandon this transform if the constant is needed elsewhere.
3962 for (const SDNode *U : N->users()) {
3963 switch (U->getOpcode()) {
3964 case ISD::AND:
3965 case ISD::OR:
3966 case ISD::XOR:
3967 if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
3968 return false;
3969 break;
3970 case RISCVISD::VMV_V_X_VL:
3971 if (!Subtarget->hasStdExtZvkb())
3972 return false;
3973 if (!all_of(U->users(), [](const SDNode *V) {
3974 return V->getOpcode() == ISD::AND ||
3975 V->getOpcode() == RISCVISD::AND_VL;
3976 }))
3977 return false;
3978 break;
3979 default:
3980 return false;
3981 }
3982 }
3983
3984 if (isInt<32>(Imm)) {
3985 Val =
3986 selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), ~Imm, *Subtarget);
3987 return true;
3988 }
3989
3990 // For 64-bit constants, the instruction sequences get complex,
3991 // so we select inverted only if it's cheaper.
3992 return selectImm64IfCheaper(~Imm, Imm, N, Val);
3993}
3994
3995static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
3996 unsigned Bits,
3997 const TargetInstrInfo *TII) {
3998 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
3999
4000 if (!MCOpcode)
4001 return false;
4002
4003 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
4004 const uint64_t TSFlags = MCID.TSFlags;
4005 if (!RISCVII::hasSEWOp(TSFlags))
4006 return false;
4007 assert(RISCVII::hasVLOp(TSFlags));
4008
4009 unsigned ChainOpIdx = User->getNumOperands() - 1;
4010 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
4011 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
4012 unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
4013 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
4014
4015 if (UserOpNo == VLIdx)
4016 return false;
4017
4018 auto NumDemandedBits =
4019 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
4020 return NumDemandedBits && Bits >= *NumDemandedBits;
4021}
4022
4023// Return true if all users of this SDNode* only consume the lower \p Bits.
4024// This can be used to form W instructions for add/sub/mul/shl even when the
4025// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
4026// SimplifyDemandedBits has made it so some users see a sext_inreg and some
4027// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
4028// the add/sub/mul/shl to become non-W instructions. By checking the users we
4029// may be able to use a W instruction and CSE with the other instruction if
4030// this has happened. We could try to detect that the CSE opportunity exists
4031// before doing this, but that would be more complicated.
4033 const unsigned Depth) const {
4034 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
4035 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
4036 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
4037 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
4038 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
4039 isa<ConstantSDNode>(Node) || Depth != 0) &&
4040 "Unexpected opcode");
4041
4043 return false;
4044
4045 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
4046 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
4047 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
4048 return false;
4049
4050 for (SDUse &Use : Node->uses()) {
4051 SDNode *User = Use.getUser();
4052 // Users of this node should have already been instruction selected
4053 if (!User->isMachineOpcode())
4054 return false;
4055
4056 // TODO: Add more opcodes?
4057 switch (User->getMachineOpcode()) {
4058 default:
4060 break;
4061 return false;
4062 case RISCV::ADDW:
4063 case RISCV::ADDIW:
4064 case RISCV::SUBW:
4065 case RISCV::MULW:
4066 case RISCV::SLLW:
4067 case RISCV::SLLIW:
4068 case RISCV::SRAW:
4069 case RISCV::SRAIW:
4070 case RISCV::SRLW:
4071 case RISCV::SRLIW:
4072 case RISCV::DIVW:
4073 case RISCV::DIVUW:
4074 case RISCV::REMW:
4075 case RISCV::REMUW:
4076 case RISCV::ROLW:
4077 case RISCV::RORW:
4078 case RISCV::RORIW:
4079 case RISCV::CLZW:
4080 case RISCV::CTZW:
4081 case RISCV::CPOPW:
4082 case RISCV::SLLI_UW:
4083 case RISCV::ABSW:
4084 case RISCV::FMV_W_X:
4085 case RISCV::FCVT_H_W:
4086 case RISCV::FCVT_H_W_INX:
4087 case RISCV::FCVT_H_WU:
4088 case RISCV::FCVT_H_WU_INX:
4089 case RISCV::FCVT_S_W:
4090 case RISCV::FCVT_S_W_INX:
4091 case RISCV::FCVT_S_WU:
4092 case RISCV::FCVT_S_WU_INX:
4093 case RISCV::FCVT_D_W:
4094 case RISCV::FCVT_D_W_INX:
4095 case RISCV::FCVT_D_WU:
4096 case RISCV::FCVT_D_WU_INX:
4097 case RISCV::TH_REVW:
4098 case RISCV::TH_SRRIW:
4099 if (Bits >= 32)
4100 break;
4101 return false;
4102 case RISCV::SLL:
4103 case RISCV::SRA:
4104 case RISCV::SRL:
4105 case RISCV::ROL:
4106 case RISCV::ROR:
4107 case RISCV::BSET:
4108 case RISCV::BCLR:
4109 case RISCV::BINV:
4110 // Shift amount operands only use log2(Xlen) bits.
4111 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
4112 break;
4113 return false;
4114 case RISCV::SLLI:
4115 // SLLI only uses the lower (XLen - ShAmt) bits.
4116 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
4117 break;
4118 return false;
4119 case RISCV::ANDI:
4120 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
4121 break;
4122 goto RecCheck;
4123 case RISCV::ORI: {
4124 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
4125 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
4126 break;
4127 [[fallthrough]];
4128 }
4129 case RISCV::AND:
4130 case RISCV::OR:
4131 case RISCV::XOR:
4132 case RISCV::XORI:
4133 case RISCV::ANDN:
4134 case RISCV::ORN:
4135 case RISCV::XNOR:
4136 case RISCV::SH1ADD:
4137 case RISCV::SH2ADD:
4138 case RISCV::SH3ADD:
4139 RecCheck:
4140 if (hasAllNBitUsers(User, Bits, Depth + 1))
4141 break;
4142 return false;
4143 case RISCV::SRLI: {
4144 unsigned ShAmt = User->getConstantOperandVal(1);
4145 // If we are shifting right by less than Bits, and users don't demand any
4146 // bits that were shifted into [Bits-1:0], then we can consider this as an
4147 // N-Bit user.
4148 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
4149 break;
4150 return false;
4151 }
4152 case RISCV::SEXT_B:
4153 case RISCV::PACKH:
4154 if (Bits >= 8)
4155 break;
4156 return false;
4157 case RISCV::SEXT_H:
4158 case RISCV::FMV_H_X:
4159 case RISCV::ZEXT_H_RV32:
4160 case RISCV::ZEXT_H_RV64:
4161 case RISCV::PACKW:
4162 if (Bits >= 16)
4163 break;
4164 return false;
4165 case RISCV::PACK:
4166 if (Bits >= (Subtarget->getXLen() / 2))
4167 break;
4168 return false;
4169 case RISCV::ADD_UW:
4170 case RISCV::SH1ADD_UW:
4171 case RISCV::SH2ADD_UW:
4172 case RISCV::SH3ADD_UW:
4173 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
4174 // 32 bits.
4175 if (Use.getOperandNo() == 0 && Bits >= 32)
4176 break;
4177 return false;
4178 case RISCV::SB:
4179 if (Use.getOperandNo() == 0 && Bits >= 8)
4180 break;
4181 return false;
4182 case RISCV::SH:
4183 if (Use.getOperandNo() == 0 && Bits >= 16)
4184 break;
4185 return false;
4186 case RISCV::SW:
4187 if (Use.getOperandNo() == 0 && Bits >= 32)
4188 break;
4189 return false;
4190 case RISCV::TH_EXT:
4191 case RISCV::TH_EXTU: {
4192 unsigned Msb = User->getConstantOperandVal(1);
4193 unsigned Lsb = User->getConstantOperandVal(2);
4194 // Behavior of Msb < Lsb is not well documented.
4195 if (Msb >= Lsb && Bits > Msb)
4196 break;
4197 return false;
4198 }
4199 }
4200 }
4201
4202 return true;
4203}
4204
4205// Select a constant that can be represented as (sign_extend(imm5) << imm2).
4207 SDValue &Shl2) {
4208 auto *C = dyn_cast<ConstantSDNode>(N);
4209 if (!C)
4210 return false;
4211
4212 int64_t Offset = C->getSExtValue();
4213 for (unsigned Shift = 0; Shift < 4; Shift++) {
4214 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) {
4215 EVT VT = N->getValueType(0);
4216 Simm5 = CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(N), VT);
4217 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), VT);
4218 return true;
4219 }
4220 }
4221
4222 return false;
4223}
4224
4225// Select VL as a 5 bit immediate or a value that will become a register. This
4226// allows us to choose between VSETIVLI or VSETVLI later.
4228 auto *C = dyn_cast<ConstantSDNode>(N);
4229 if (C && isUInt<5>(C->getZExtValue())) {
4230 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
4231 N->getValueType(0));
4232 } else if (C && C->isAllOnes()) {
4233 // Treat all ones as VLMax.
4234 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4235 N->getValueType(0));
4236 } else if (isa<RegisterSDNode>(N) &&
4237 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
4238 // All our VL operands use an operand that allows GPRNoX0 or an immediate
4239 // as the register class. Convert X0 to a special immediate to pass the
4240 // MachineVerifier. This is recognized specially by the vsetvli insertion
4241 // pass.
4242 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4243 N->getValueType(0));
4244 } else {
4245 VL = N;
4246 }
4247
4248 return true;
4249}
4250
4252 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
4253 if (!N.getOperand(0).isUndef())
4254 return SDValue();
4255 N = N.getOperand(1);
4256 }
4257 SDValue Splat = N;
4258 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
4259 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
4260 !Splat.getOperand(0).isUndef())
4261 return SDValue();
4262 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
4263 return Splat;
4264}
4265
4268 if (!Splat)
4269 return false;
4270
4271 SplatVal = Splat.getOperand(1);
4272 return true;
4273}
4274
4276 SelectionDAG &DAG,
4277 const RISCVSubtarget &Subtarget,
4278 std::function<bool(int64_t)> ValidateImm,
4279 bool Decrement = false) {
4281 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
4282 return false;
4283
4284 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
4285 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
4286 "Unexpected splat operand type");
4287
4288 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
4289 // type is wider than the resulting vector element type: an implicit
4290 // truncation first takes place. Therefore, perform a manual
4291 // truncation/sign-extension in order to ignore any truncated bits and catch
4292 // any zero-extended immediate.
4293 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
4294 // sign-extending to (XLenVT -1).
4295 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
4296
4297 int64_t SplatImm = SplatConst.getSExtValue();
4298
4299 if (!ValidateImm(SplatImm))
4300 return false;
4301
4302 if (Decrement)
4303 SplatImm -= 1;
4304
4305 SplatVal =
4306 DAG.getSignedTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
4307 return true;
4308}
4309
4311 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
4312 [](int64_t Imm) { return isInt<5>(Imm); });
4313}
4314
4316 return selectVSplatImmHelper(
4317 N, SplatVal, *CurDAG, *Subtarget,
4318 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; },
4319 /*Decrement=*/true);
4320}
4321
4323 return selectVSplatImmHelper(
4324 N, SplatVal, *CurDAG, *Subtarget,
4325 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; },
4326 /*Decrement=*/false);
4327}
4328
4330 SDValue &SplatVal) {
4331 return selectVSplatImmHelper(
4332 N, SplatVal, *CurDAG, *Subtarget,
4333 [](int64_t Imm) {
4334 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
4335 },
4336 /*Decrement=*/true);
4337}
4338
4340 SDValue &SplatVal) {
4341 return selectVSplatImmHelper(
4342 N, SplatVal, *CurDAG, *Subtarget,
4343 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
4344}
4345
4348 return Splat && selectNegImm(Splat.getOperand(1), SplatVal);
4349}
4350
4352 auto IsExtOrTrunc = [](SDValue N) {
4353 switch (N->getOpcode()) {
4354 case ISD::SIGN_EXTEND:
4355 case ISD::ZERO_EXTEND:
4356 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
4357 // inactive elements will be undef.
4358 case RISCVISD::TRUNCATE_VECTOR_VL:
4359 case RISCVISD::VSEXT_VL:
4360 case RISCVISD::VZEXT_VL:
4361 return true;
4362 default:
4363 return false;
4364 }
4365 };
4366
4367 // We can have multiple nested nodes, so unravel them all if needed.
4368 while (IsExtOrTrunc(N)) {
4369 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
4370 return false;
4371 N = N->getOperand(0);
4372 }
4373
4374 return selectVSplat(N, SplatVal);
4375}
4376
4378 // Allow bitcasts from XLenVT -> FP.
4379 if (N.getOpcode() == ISD::BITCAST &&
4380 N.getOperand(0).getValueType() == Subtarget->getXLenVT()) {
4381 Imm = N.getOperand(0);
4382 return true;
4383 }
4384 // Allow moves from XLenVT to FP.
4385 if (N.getOpcode() == RISCVISD::FMV_H_X ||
4386 N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
4387 Imm = N.getOperand(0);
4388 return true;
4389 }
4390
4391 // Otherwise, look for FP constants that can materialized with scalar int.
4393 if (!CFP)
4394 return false;
4395 const APFloat &APF = CFP->getValueAPF();
4396 // td can handle +0.0 already.
4397 if (APF.isPosZero())
4398 return false;
4399
4400 MVT VT = CFP->getSimpleValueType(0);
4401
4402 MVT XLenVT = Subtarget->getXLenVT();
4403 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
4404 assert(APF.isNegZero() && "Unexpected constant.");
4405 return false;
4406 }
4407 SDLoc DL(N);
4408 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
4409 *Subtarget);
4410 return true;
4411}
4412
4414 SDValue &Imm) {
4415 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
4416 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
4417
4418 if (!isInt<5>(ImmVal))
4419 return false;
4420
4421 Imm = CurDAG->getSignedTargetConstant(ImmVal, SDLoc(N),
4422 Subtarget->getXLenVT());
4423 return true;
4424 }
4425
4426 return false;
4427}
4428
4429// Try to remove sext.w if the input is a W instruction or can be made into
4430// a W instruction cheaply.
4431bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
4432 // Look for the sext.w pattern, addiw rd, rs1, 0.
4433 if (N->getMachineOpcode() != RISCV::ADDIW ||
4434 !isNullConstant(N->getOperand(1)))
4435 return false;
4436
4437 SDValue N0 = N->getOperand(0);
4438 if (!N0.isMachineOpcode())
4439 return false;
4440
4441 switch (N0.getMachineOpcode()) {
4442 default:
4443 break;
4444 case RISCV::ADD:
4445 case RISCV::ADDI:
4446 case RISCV::SUB:
4447 case RISCV::MUL:
4448 case RISCV::SLLI: {
4449 // Convert sext.w+add/sub/mul to their W instructions. This will create
4450 // a new independent instruction. This improves latency.
4451 unsigned Opc;
4452 switch (N0.getMachineOpcode()) {
4453 default:
4454 llvm_unreachable("Unexpected opcode!");
4455 case RISCV::ADD: Opc = RISCV::ADDW; break;
4456 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
4457 case RISCV::SUB: Opc = RISCV::SUBW; break;
4458 case RISCV::MUL: Opc = RISCV::MULW; break;
4459 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
4460 }
4461
4462 SDValue N00 = N0.getOperand(0);
4463 SDValue N01 = N0.getOperand(1);
4464
4465 // Shift amount needs to be uimm5.
4466 if (N0.getMachineOpcode() == RISCV::SLLI &&
4467 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
4468 break;
4469
4470 SDNode *Result =
4471 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
4472 N00, N01);
4473 ReplaceUses(N, Result);
4474 return true;
4475 }
4476 case RISCV::ADDW:
4477 case RISCV::ADDIW:
4478 case RISCV::SUBW:
4479 case RISCV::MULW:
4480 case RISCV::SLLIW:
4481 case RISCV::PACKW:
4482 case RISCV::TH_MULAW:
4483 case RISCV::TH_MULAH:
4484 case RISCV::TH_MULSW:
4485 case RISCV::TH_MULSH:
4486 if (N0.getValueType() == MVT::i32)
4487 break;
4488
4489 // Result is already sign extended just remove the sext.w.
4490 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
4491 ReplaceUses(N, N0.getNode());
4492 return true;
4493 }
4494
4495 return false;
4496}
4497
4498static bool usesAllOnesMask(SDValue MaskOp) {
4499 const auto IsVMSet = [](unsigned Opc) {
4500 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
4501 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
4502 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
4503 Opc == RISCV::PseudoVMSET_M_B8;
4504 };
4505
4506 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
4507 // undefined behaviour if it's the wrong bitwidth, so we could choose to
4508 // assume that it's all-ones? Same applies to its VL.
4509 return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode());
4510}
4511
4512static bool isImplicitDef(SDValue V) {
4513 if (!V.isMachineOpcode())
4514 return false;
4515 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
4516 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
4517 if (!isImplicitDef(V.getOperand(I)))
4518 return false;
4519 return true;
4520 }
4521 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
4522}
4523
4524// Optimize masked RVV pseudo instructions with a known all-ones mask to their
4525// corresponding "unmasked" pseudo versions.
4526bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
4527 const RISCV::RISCVMaskedPseudoInfo *I =
4528 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
4529 if (!I)
4530 return false;
4531
4532 unsigned MaskOpIdx = I->MaskOpIdx;
4533 if (!usesAllOnesMask(N->getOperand(MaskOpIdx)))
4534 return false;
4535
4536 // There are two classes of pseudos in the table - compares and
4537 // everything else. See the comment on RISCVMaskedPseudo for details.
4538 const unsigned Opc = I->UnmaskedPseudo;
4539 const MCInstrDesc &MCID = TII->get(Opc);
4540 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID);
4541
4542 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
4543 const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(MaskedMCID);
4544
4545 assert((RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ||
4547 "Unmasked pseudo has policy but masked pseudo doesn't?");
4548 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru &&
4549 "Unexpected pseudo structure");
4550 assert(!(HasPassthru && !MaskedHasPassthru) &&
4551 "Unmasked pseudo has passthru but masked pseudo doesn't?");
4552
4554 // Skip the passthru operand at index 0 if the unmasked don't have one.
4555 bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
4556 bool DropPolicy = !RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
4557 RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags);
4558 bool HasChainOp =
4559 N->getOperand(N->getNumOperands() - 1).getValueType() == MVT::Other;
4560 unsigned LastOpNum = N->getNumOperands() - 1 - HasChainOp;
4561 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
4562 // Skip the mask
4563 SDValue Op = N->getOperand(I);
4564 if (I == MaskOpIdx)
4565 continue;
4566 if (DropPolicy && I == LastOpNum)
4567 continue;
4568 Ops.push_back(Op);
4569 }
4570
4571 MachineSDNode *Result =
4572 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4573
4574 if (!N->memoperands_empty())
4575 CurDAG->setNodeMemRefs(Result, N->memoperands());
4576
4577 Result->setFlags(N->getFlags());
4578 ReplaceUses(N, Result);
4579
4580 return true;
4581}
4582
4583/// If our passthru is an implicit_def, use noreg instead. This side
4584/// steps issues with MachineCSE not being able to CSE expressions with
4585/// IMPLICIT_DEF operands while preserving the semantic intent. See
4586/// pr64282 for context. Note that this transform is the last one
4587/// performed at ISEL DAG to DAG.
4588bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4589 bool MadeChange = false;
4590 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4591
4592 while (Position != CurDAG->allnodes_begin()) {
4593 SDNode *N = &*--Position;
4594 if (N->use_empty() || !N->isMachineOpcode())
4595 continue;
4596
4597 const unsigned Opc = N->getMachineOpcode();
4598 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
4600 !isImplicitDef(N->getOperand(0)))
4601 continue;
4602
4604 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
4605 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4606 SDValue Op = N->getOperand(I);
4607 Ops.push_back(Op);
4608 }
4609
4610 MachineSDNode *Result =
4611 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4612 Result->setFlags(N->getFlags());
4613 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
4614 ReplaceUses(N, Result);
4615 MadeChange = true;
4616 }
4617 return MadeChange;
4618}
4619
4620
4621// This pass converts a legalized DAG into a RISCV-specific DAG, ready
4622// for instruction scheduling.
4624 CodeGenOptLevel OptLevel) {
4625 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4626}
4627
4629
4634
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool getVal(MDTuple *MD, const char *Key, uint64_t &Val)
static bool usesAllOnesMask(SDValue MaskOp)
static Register getTileReg(uint64_t TileNum)
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
static bool isRegRegScaleLoadOrStore(SDNode *User, SDValue Add, const RISCVSubtarget &Subtarget)
Return true if this a load/store that we have a RegRegScale instruction for.
#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm, bool Decrement=false)
static unsigned getSegInstNF(unsigned Intrinsic)
static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget, SDValue Add, SDValue Shift=SDValue())
Is it profitable to fold this Add into RegRegScale load/store.
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false)
#define INST_ALL_NF_CASE_WITH_FF(NAME)
#define CASE_VMSLT_OPCODES(lmulenum, suffix)
bool isRegImmLoadOrStore(SDNode *User, SDValue Add)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
static SDValue findVSplat(SDValue N)
#define INST_ALL_NF_CASE(NAME)
static bool isApplicableToPLI(int Val)
Contains matchers for matching SelectionDAG nodes and values.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
#define PASS_NAME
DEMANGLE_DUMP_METHOD void dump() const
bool isZero() const
Definition APFloat.h:1427
APInt bitcastToAPInt() const
Definition APFloat.h:1335
bool isPosZero() const
Definition APFloat.h:1442
bool isNegZero() const
Definition APFloat.h:1443
Class for arbitrary precision integers.
Definition APInt.h:78
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
This class is used to form a handle around another node that is persistent and is updated across invo...
const SDValue & getValue() const
static StringRef getMemConstraintName(ConstraintCode C)
Definition InlineAsm.h:470
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Machine Value Type.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TargetMachine, CodeGenOptLevel OptLevel)
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectNegImm(SDValue N, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool areOffsetsWithinAlignment(SDValue Addr, Align Alignment)
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset should be all zeros.
bool selectZExtImm32(SDValue N, SDValue &Val)
bool SelectAddrRegZextRegScale(SDValue Addr, unsigned MaxShiftAmount, unsigned Bits, SDValue &Base, SDValue &Index, SDValue &Scale)
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset)
void selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
void selectVLSEGFF(SDNode *Node, unsigned NF, bool IsMasked)
bool selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool orDisjoint(const SDNode *Node) const
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool tryUnsignedBitfieldInsertInZero(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
bool selectInvLogicImm(SDValue N, SDValue &Val)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
void selectXSfmmVSET(SDNode *Node)
bool trySignedBitfieldInsertInSign(SDNode *Node)
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool SelectAddrRegImm9(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
bool selectScalarFPAsInt(SDValue N, SDValue &Imm)
bool hasAllBUsers(SDNode *Node) const
void selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
void selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool selectVSplatImm64Neg(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
void selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
bool hasShlAdd(int64_t ShAmt) const
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVVType::VLMUL getLMUL(MVT VT)
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
iterator_range< user_iterator > users()
Definition Value.h:426
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI unsigned encodeXSfmmVType(unsigned SEW, unsigned Widen, bool AltFmt)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
LLVM_ABI unsigned getSEWLMULRatio(unsigned SEW, VLMUL VLMul)
static unsigned decodeTWiden(unsigned TWiden)
LLVM_ABI unsigned encodeVTYPE(VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic, bool AltFmt=false)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
std::optional< unsigned > getVectorLowDemandedScalarBits(unsigned Opcode, unsigned Log2SEW)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
static const MachineMemOperand::Flags MONontemporalBit1
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
unsigned M1(unsigned Val)
Definition VE.h:377
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
constexpr T maskTrailingZeros(unsigned N)
Create a bitmask with the N right-most bits set to 0, and all other bits set to 1.
Definition MathExtras.h:94
@ Add
Sum of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:867
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.