LLVM 17.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
16#include "RISCVISelLowering.h"
19#include "llvm/IR/IntrinsicsRISCV.h"
21#include "llvm/Support/Debug.h"
24#include <optional>
25
26using namespace llvm;
27
28#define DEBUG_TYPE "riscv-isel"
29#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
30
31namespace llvm::RISCV {
32#define GET_RISCVVSSEGTable_IMPL
33#define GET_RISCVVLSEGTable_IMPL
34#define GET_RISCVVLXSEGTable_IMPL
35#define GET_RISCVVSXSEGTable_IMPL
36#define GET_RISCVVLETable_IMPL
37#define GET_RISCVVSETable_IMPL
38#define GET_RISCVVLXTable_IMPL
39#define GET_RISCVVSXTable_IMPL
40#define GET_RISCVMaskedPseudosTable_IMPL
41#include "RISCVGenSearchableTables.inc"
42} // namespace llvm::RISCV
43
44static unsigned getLastNonGlueOrChainOpIdx(const SDNode *Node) {
45 assert(Node->getNumOperands() > 0 && "Node with no operands");
46 unsigned LastOpIdx = Node->getNumOperands() - 1;
47 if (Node->getOperand(LastOpIdx).getValueType() == MVT::Glue)
48 --LastOpIdx;
49 if (Node->getOperand(LastOpIdx).getValueType() == MVT::Other)
50 --LastOpIdx;
51 return LastOpIdx;
52}
53
54static unsigned getVecPolicyOpIdx(const SDNode *Node, const MCInstrDesc &MCID) {
56 (void)MCID;
58}
59
62
63 bool MadeChange = false;
64 while (Position != CurDAG->allnodes_begin()) {
65 SDNode *N = &*--Position;
66 if (N->use_empty())
67 continue;
68
69 SDValue Result;
70 switch (N->getOpcode()) {
71 case ISD::SPLAT_VECTOR: {
72 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
73 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
74 MVT VT = N->getSimpleValueType(0);
75 unsigned Opc =
77 SDLoc DL(N);
78 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
79 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT),
80 N->getOperand(0), VL);
81 break;
82 }
84 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
85 // load. Done after lowering and combining so that we have a chance to
86 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
87 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
88 MVT VT = N->getSimpleValueType(0);
89 SDValue Passthru = N->getOperand(0);
90 SDValue Lo = N->getOperand(1);
91 SDValue Hi = N->getOperand(2);
92 SDValue VL = N->getOperand(3);
93 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
94 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
95 "Unexpected VTs!");
99 SDLoc DL(N);
100
101 // We use the same frame index we use for moving two i32s into 64-bit FPR.
102 // This is an analogous operation.
103 int FI = FuncInfo->getMoveF64FrameIndex(MF);
106 SDValue StackSlot =
108
109 SDValue Chain = CurDAG->getEntryNode();
110 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
111
112 SDValue OffsetSlot =
114 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
115 Align(8));
116
117 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
118
119 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
120 SDValue IntID =
121 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
122 SDValue Ops[] = {Chain,
123 IntID,
124 Passthru,
125 StackSlot,
126 CurDAG->getRegister(RISCV::X0, MVT::i64),
127 VL};
128
130 MVT::i64, MPI, Align(8),
132 break;
133 }
134 }
135
136 if (Result) {
137 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
138 LLVM_DEBUG(N->dump(CurDAG));
139 LLVM_DEBUG(dbgs() << "\nNew: ");
140 LLVM_DEBUG(Result->dump(CurDAG));
141 LLVM_DEBUG(dbgs() << "\n");
142
144 MadeChange = true;
145 }
146 }
147
148 if (MadeChange)
150}
151
153 HandleSDNode Dummy(CurDAG->getRoot());
155
156 bool MadeChange = false;
157 while (Position != CurDAG->allnodes_begin()) {
158 SDNode *N = &*--Position;
159 // Skip dead nodes and any non-machine opcodes.
160 if (N->use_empty() || !N->isMachineOpcode())
161 continue;
162
163 MadeChange |= doPeepholeSExtW(N);
164 MadeChange |= doPeepholeMaskedRVV(N);
165 }
166
167 CurDAG->setRoot(Dummy.getValue());
168
169 MadeChange |= doPeepholeMergeVVMFold();
170
171 if (MadeChange)
173}
174
175static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
177 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
178 for (RISCVMatInt::Inst &Inst : Seq) {
179 SDValue SDImm = CurDAG->getTargetConstant(Inst.getImm(), DL, VT);
180 SDNode *Result = nullptr;
181 switch (Inst.getOpndKind()) {
182 case RISCVMatInt::Imm:
183 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
184 break;
186 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
187 CurDAG->getRegister(RISCV::X0, VT));
188 break;
190 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
191 break;
193 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
194 break;
195 }
196
197 // Only the first instruction has X0 as its source.
198 SrcReg = SDValue(Result, 0);
199 }
200
201 return SrcReg;
202}
203
204static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
205 int64_t Imm, const RISCVSubtarget &Subtarget) {
207 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
208
209 return selectImmSeq(CurDAG, DL, VT, Seq);
210}
211
213 unsigned NF, RISCVII::VLMUL LMUL) {
214 static const unsigned M1TupleRegClassIDs[] = {
215 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
216 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
217 RISCV::VRN8M1RegClassID};
218 static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID,
219 RISCV::VRN3M2RegClassID,
220 RISCV::VRN4M2RegClassID};
221
222 assert(Regs.size() >= 2 && Regs.size() <= 8);
223
224 unsigned RegClassID;
225 unsigned SubReg0;
226 switch (LMUL) {
227 default:
228 llvm_unreachable("Invalid LMUL.");
233 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
234 "Unexpected subreg numbering");
235 SubReg0 = RISCV::sub_vrm1_0;
236 RegClassID = M1TupleRegClassIDs[NF - 2];
237 break;
239 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
240 "Unexpected subreg numbering");
241 SubReg0 = RISCV::sub_vrm2_0;
242 RegClassID = M2TupleRegClassIDs[NF - 2];
243 break;
245 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
246 "Unexpected subreg numbering");
247 SubReg0 = RISCV::sub_vrm4_0;
248 RegClassID = RISCV::VRN2M4RegClassID;
249 break;
250 }
251
252 SDLoc DL(Regs[0]);
254
255 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
256
257 for (unsigned I = 0; I < Regs.size(); ++I) {
258 Ops.push_back(Regs[I]);
259 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
260 }
261 SDNode *N =
262 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
263 return SDValue(N, 0);
264}
265
267 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
268 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
269 bool IsLoad, MVT *IndexVT) {
270 SDValue Chain = Node->getOperand(0);
271 SDValue Glue;
272
273 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
274
275 if (IsStridedOrIndexed) {
276 Operands.push_back(Node->getOperand(CurOp++)); // Index.
277 if (IndexVT)
278 *IndexVT = Operands.back()->getSimpleValueType(0);
279 }
280
281 if (IsMasked) {
282 // Mask needs to be copied to V0.
283 SDValue Mask = Node->getOperand(CurOp++);
284 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
285 Glue = Chain.getValue(1);
286 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
287 }
288 SDValue VL;
289 selectVLOp(Node->getOperand(CurOp++), VL);
290 Operands.push_back(VL);
291
292 MVT XLenVT = Subtarget->getXLenVT();
293 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
294 Operands.push_back(SEWOp);
295
296 // Masked load has the tail policy argument.
297 if (IsMasked && IsLoad) {
298 // Policy must be a constant.
299 uint64_t Policy = Node->getConstantOperandVal(CurOp++);
300 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
301 Operands.push_back(PolicyOp);
302 }
303
304 Operands.push_back(Chain); // Chain.
305 if (Glue)
306 Operands.push_back(Glue);
307}
308
309static bool isAllUndef(ArrayRef<SDValue> Values) {
310 return llvm::all_of(Values, [](SDValue V) { return V->isUndef(); });
311}
312
313void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
314 bool IsStrided) {
315 SDLoc DL(Node);
316 unsigned NF = Node->getNumValues() - 1;
317 MVT VT = Node->getSimpleValueType(0);
318 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
320
321 unsigned CurOp = 2;
323
324 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
325 Node->op_begin() + CurOp + NF);
326 bool IsTU = IsMasked || !isAllUndef(Regs);
327 if (IsTU) {
328 SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
329 Operands.push_back(Merge);
330 }
331 CurOp += NF;
332
333 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
334 Operands, /*IsLoad=*/true);
335
336 const RISCV::VLSEGPseudo *P =
337 RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW,
338 static_cast<unsigned>(LMUL));
339 MachineSDNode *Load =
340 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
341
342 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
343 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
344
345 SDValue SuperReg = SDValue(Load, 0);
346 for (unsigned I = 0; I < NF; ++I) {
347 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
348 ReplaceUses(SDValue(Node, I),
349 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
350 }
351
352 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
353 CurDAG->RemoveDeadNode(Node);
354}
355
356void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
357 SDLoc DL(Node);
358 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain.
359 MVT VT = Node->getSimpleValueType(0);
360 MVT XLenVT = Subtarget->getXLenVT();
361 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
363
364 unsigned CurOp = 2;
366
367 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
368 Node->op_begin() + CurOp + NF);
369 bool IsTU = IsMasked || !isAllUndef(Regs);
370 if (IsTU) {
371 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
372 Operands.push_back(MaskedOff);
373 }
374 CurOp += NF;
375
376 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
377 /*IsStridedOrIndexed*/ false, Operands,
378 /*IsLoad=*/true);
379
380 const RISCV::VLSEGPseudo *P =
381 RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, /*Strided*/ false, /*FF*/ true,
382 Log2SEW, static_cast<unsigned>(LMUL));
383 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
384 XLenVT, MVT::Other, Operands);
385
386 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
387 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
388
389 SDValue SuperReg = SDValue(Load, 0);
390 for (unsigned I = 0; I < NF; ++I) {
391 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
392 ReplaceUses(SDValue(Node, I),
393 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
394 }
395
396 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL
397 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain
398 CurDAG->RemoveDeadNode(Node);
399}
400
401void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
402 bool IsOrdered) {
403 SDLoc DL(Node);
404 unsigned NF = Node->getNumValues() - 1;
405 MVT VT = Node->getSimpleValueType(0);
406 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
408
409 unsigned CurOp = 2;
411
412 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
413 Node->op_begin() + CurOp + NF);
414 bool IsTU = IsMasked || !isAllUndef(Regs);
415 if (IsTU) {
416 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
417 Operands.push_back(MaskedOff);
418 }
419 CurOp += NF;
420
421 MVT IndexVT;
422 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
423 /*IsStridedOrIndexed*/ true, Operands,
424 /*IsLoad=*/true, &IndexVT);
425
427 "Element count mismatch");
428
429 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
430 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
431 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
432 report_fatal_error("The V extension does not support EEW=64 for index "
433 "values when XLEN=32");
434 }
435 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
436 NF, IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
437 static_cast<unsigned>(IndexLMUL));
438 MachineSDNode *Load =
439 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
440
441 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
442 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
443
444 SDValue SuperReg = SDValue(Load, 0);
445 for (unsigned I = 0; I < NF; ++I) {
446 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
447 ReplaceUses(SDValue(Node, I),
448 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
449 }
450
451 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
452 CurDAG->RemoveDeadNode(Node);
453}
454
455void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked,
456 bool IsStrided) {
457 SDLoc DL(Node);
458 unsigned NF = Node->getNumOperands() - 4;
459 if (IsStrided)
460 NF--;
461 if (IsMasked)
462 NF--;
463 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
464 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
466 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
467 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
468
470 Operands.push_back(StoreVal);
471 unsigned CurOp = 2 + NF;
472
473 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
474 Operands);
475
476 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
477 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
478 MachineSDNode *Store =
479 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
480
481 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
482 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
483
484 ReplaceNode(Node, Store);
485}
486
487void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
488 bool IsOrdered) {
489 SDLoc DL(Node);
490 unsigned NF = Node->getNumOperands() - 5;
491 if (IsMasked)
492 --NF;
493 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
494 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
496 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
497 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
498
500 Operands.push_back(StoreVal);
501 unsigned CurOp = 2 + NF;
502
503 MVT IndexVT;
504 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
505 /*IsStridedOrIndexed*/ true, Operands,
506 /*IsLoad=*/false, &IndexVT);
507
509 "Element count mismatch");
510
511 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
512 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
513 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
514 report_fatal_error("The V extension does not support EEW=64 for index "
515 "values when XLEN=32");
516 }
517 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
518 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
519 static_cast<unsigned>(IndexLMUL));
520 MachineSDNode *Store =
521 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
522
523 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
524 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
525
526 ReplaceNode(Node, Store);
527}
528
530 if (!Subtarget->hasVInstructions())
531 return;
532
533 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
534
535 SDLoc DL(Node);
536 MVT XLenVT = Subtarget->getXLenVT();
537
538 unsigned IntNo = Node->getConstantOperandVal(0);
539
540 assert((IntNo == Intrinsic::riscv_vsetvli ||
541 IntNo == Intrinsic::riscv_vsetvlimax) &&
542 "Unexpected vsetvli intrinsic");
543
544 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
545 unsigned Offset = (VLMax ? 1 : 2);
546
547 assert(Node->getNumOperands() == Offset + 2 &&
548 "Unexpected number of operands");
549
550 unsigned SEW =
551 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
552 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
553 Node->getConstantOperandVal(Offset + 1) & 0x7);
554
555 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
556 /*MaskAgnostic*/ true);
557 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
558
559 SmallVector<EVT, 2> VTs = {XLenVT};
560
561 SDValue VLOperand;
562 unsigned Opcode = RISCV::PseudoVSETVLI;
563 if (VLMax) {
564 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
565 Opcode = RISCV::PseudoVSETVLIX0;
566 } else {
567 VLOperand = Node->getOperand(1);
568
569 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
570 uint64_t AVL = C->getZExtValue();
571 if (isUInt<5>(AVL)) {
572 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
573 SmallVector<SDValue, 3> Ops = {VLImm, VTypeIOp};
575 Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops));
576 return;
577 }
578 }
579 }
580
581 SmallVector<SDValue, 3> Ops = {VLOperand, VTypeIOp};
582
583 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops));
584}
585
587 MVT VT = Node->getSimpleValueType(0);
588 unsigned Opcode = Node->getOpcode();
589 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
590 "Unexpected opcode");
591 SDLoc DL(Node);
592
593 // For operations of the form (x << C1) op C2, check if we can use
594 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
595 SDValue N0 = Node->getOperand(0);
596 SDValue N1 = Node->getOperand(1);
597
598 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
599 if (!Cst)
600 return false;
601
602 int64_t Val = Cst->getSExtValue();
603
604 // Check if immediate can already use ANDI/ORI/XORI.
605 if (isInt<12>(Val))
606 return false;
607
608 SDValue Shift = N0;
609
610 // If Val is simm32 and we have a sext_inreg from i32, then the binop
611 // produces at least 33 sign bits. We can peek through the sext_inreg and use
612 // a SLLIW at the end.
613 bool SignExt = false;
614 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
615 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
616 SignExt = true;
617 Shift = N0.getOperand(0);
618 }
619
620 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
621 return false;
622
623 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
624 if (!ShlCst)
625 return false;
626
627 uint64_t ShAmt = ShlCst->getZExtValue();
628
629 // Make sure that we don't change the operation by removing bits.
630 // This only matters for OR and XOR, AND is unaffected.
631 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
632 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
633 return false;
634
635 int64_t ShiftedVal = Val >> ShAmt;
636 if (!isInt<12>(ShiftedVal))
637 return false;
638
639 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
640 if (SignExt && ShAmt >= 32)
641 return false;
642
643 // Ok, we can reorder to get a smaller immediate.
644 unsigned BinOpc;
645 switch (Opcode) {
646 default: llvm_unreachable("Unexpected opcode");
647 case ISD::AND: BinOpc = RISCV::ANDI; break;
648 case ISD::OR: BinOpc = RISCV::ORI; break;
649 case ISD::XOR: BinOpc = RISCV::XORI; break;
650 }
651
652 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
653
654 SDNode *BinOp =
655 CurDAG->getMachineNode(BinOpc, DL, VT, Shift.getOperand(0),
656 CurDAG->getTargetConstant(ShiftedVal, DL, VT));
657 SDNode *SLLI =
658 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
659 CurDAG->getTargetConstant(ShAmt, DL, VT));
660 ReplaceNode(Node, SLLI);
661 return true;
662}
663
665 // Only supported with XTHeadBb at the moment.
666 if (!Subtarget->hasVendorXTHeadBb())
667 return false;
668
669 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
670 if (!N1C)
671 return false;
672
673 SDValue N0 = Node->getOperand(0);
674 if (!N0.hasOneUse())
675 return false;
676
677 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
678 MVT VT) {
679 return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
680 CurDAG->getTargetConstant(Msb, DL, VT),
681 CurDAG->getTargetConstant(Lsb, DL, VT));
682 };
683
684 SDLoc DL(Node);
685 MVT VT = Node->getSimpleValueType(0);
686 const unsigned RightShAmt = N1C->getZExtValue();
687
688 // Transform (sra (shl X, C1) C2) with C1 < C2
689 // -> (TH.EXT X, msb, lsb)
690 if (N0.getOpcode() == ISD::SHL) {
691 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
692 if (!N01C)
693 return false;
694
695 const unsigned LeftShAmt = N01C->getZExtValue();
696 // Make sure that this is a bitfield extraction (i.e., the shift-right
697 // amount can not be less than the left-shift).
698 if (LeftShAmt > RightShAmt)
699 return false;
700
701 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
702 const unsigned Msb = MsbPlusOne - 1;
703 const unsigned Lsb = RightShAmt - LeftShAmt;
704
705 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
706 ReplaceNode(Node, TH_EXT);
707 return true;
708 }
709
710 // Transform (sra (sext_inreg X, _), C) ->
711 // (TH.EXT X, msb, lsb)
712 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
713 unsigned ExtSize =
714 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
715
716 // ExtSize of 32 should use sraiw via tablegen pattern.
717 if (ExtSize == 32)
718 return false;
719
720 const unsigned Msb = ExtSize - 1;
721 const unsigned Lsb = RightShAmt;
722
723 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
724 ReplaceNode(Node, TH_EXT);
725 return true;
726 }
727
728 return false;
729}
730
732 // Target does not support indexed loads.
733 if (!Subtarget->hasVendorXTHeadMemIdx())
734 return false;
735
736 LoadSDNode *Ld = cast<LoadSDNode>(Node);
738 if (AM == ISD::UNINDEXED)
739 return false;
740
741 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
742 if (!C)
743 return false;
744
745 EVT LoadVT = Ld->getMemoryVT();
746 bool IsPre = (AM == ISD::PRE_INC || AM == ISD::PRE_DEC);
747 bool IsPost = (AM == ISD::POST_INC || AM == ISD::POST_DEC);
748 int64_t Offset = C->getSExtValue();
749
750 // Convert decrements to increments by a negative quantity.
751 if (AM == ISD::PRE_DEC || AM == ISD::POST_DEC)
752 Offset = -Offset;
753
754 // The constants that can be encoded in the THeadMemIdx instructions
755 // are of the form (sign_extend(imm5) << imm2).
756 int64_t Shift;
757 for (Shift = 0; Shift < 4; Shift++)
758 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
759 break;
760
761 // Constant cannot be encoded.
762 if (Shift == 4)
763 return false;
764
765 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
766 unsigned Opcode;
767 if (LoadVT == MVT::i8 && IsPre)
768 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
769 else if (LoadVT == MVT::i8 && IsPost)
770 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
771 else if (LoadVT == MVT::i16 && IsPre)
772 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
773 else if (LoadVT == MVT::i16 && IsPost)
774 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
775 else if (LoadVT == MVT::i32 && IsPre)
776 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
777 else if (LoadVT == MVT::i32 && IsPost)
778 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
779 else if (LoadVT == MVT::i64 && IsPre)
780 Opcode = RISCV::TH_LDIB;
781 else if (LoadVT == MVT::i64 && IsPost)
782 Opcode = RISCV::TH_LDIA;
783 else
784 return false;
785
786 EVT Ty = Ld->getOffset().getValueType();
787 SDValue Ops[] = {Ld->getBasePtr(),
788 CurDAG->getTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
789 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty),
790 Ld->getChain()};
791 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
792 Ld->getValueType(1), MVT::Other, Ops);
793
794 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
795 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
796
797 ReplaceNode(Node, New);
798
799 return true;
800}
801
803 // If we have a custom node, we have already selected.
804 if (Node->isMachineOpcode()) {
805 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
806 Node->setNodeId(-1);
807 return;
808 }
809
810 // Instruction Selection not handled by the auto-generated tablegen selection
811 // should be handled here.
812 unsigned Opcode = Node->getOpcode();
813 MVT XLenVT = Subtarget->getXLenVT();
814 SDLoc DL(Node);
815 MVT VT = Node->getSimpleValueType(0);
816
817 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
818
819 switch (Opcode) {
820 case ISD::Constant: {
821 assert(VT == Subtarget->getXLenVT() && "Unexpected VT");
822 auto *ConstNode = cast<ConstantSDNode>(Node);
823 if (ConstNode->isZero()) {
824 SDValue New =
825 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
826 ReplaceNode(Node, New.getNode());
827 return;
828 }
829 int64_t Imm = ConstNode->getSExtValue();
830 // If the upper XLen-16 bits are not used, try to convert this to a simm12
831 // by sign extending bit 15.
832 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
833 hasAllHUsers(Node))
834 Imm = SignExtend64<16>(Imm);
835 // If the upper 32-bits are not used try to convert this into a simm32 by
836 // sign extending bit 32.
837 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
838 Imm = SignExtend64<32>(Imm);
839
840 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
841 return;
842 }
843 case ISD::ConstantFP: {
844 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
845 int FPImm = static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(
846 APF, VT);
847 if (FPImm >= 0) {
848 unsigned Opc;
849 switch (VT.SimpleTy) {
850 default:
851 llvm_unreachable("Unexpected size");
852 case MVT::f16:
853 Opc = RISCV::FLI_H;
854 break;
855 case MVT::f32:
856 Opc = RISCV::FLI_S;
857 break;
858 case MVT::f64:
859 Opc = RISCV::FLI_D;
860 break;
861 }
862
864 Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));
865 ReplaceNode(Node, Res);
866 return;
867 }
868
869 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
870 SDValue Imm;
871 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
872 // create an integer immediate.
873 if (APF.isPosZero() || NegZeroF64)
874 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
875 else
876 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
877 *Subtarget);
878
879 unsigned Opc;
880 switch (VT.SimpleTy) {
881 default:
882 llvm_unreachable("Unexpected size");
883 case MVT::f16:
884 Opc =
885 Subtarget->hasStdExtZhinxOrZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
886 break;
887 case MVT::f32:
888 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
889 break;
890 case MVT::f64:
891 // For RV32, we can't move from a GPR, we need to convert instead. This
892 // should only happen for +0.0 and -0.0.
893 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
894 bool HasZdinx = Subtarget->hasStdExtZdinx();
895 if (Subtarget->is64Bit())
896 Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
897 else
898 Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
899 break;
900 }
901
902 SDNode *Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
903
904 // For f64 -0.0, we need to insert a fneg.d idiom.
905 if (NegZeroF64)
906 Res = CurDAG->getMachineNode(RISCV::FSGNJN_D, DL, VT, SDValue(Res, 0),
907 SDValue(Res, 0));
908
909 ReplaceNode(Node, Res);
910 return;
911 }
912 case RISCVISD::SplitF64: {
913 if (!Subtarget->hasStdExtZfa())
914 break;
915 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
916 "Unexpected subtarget");
917
918 // With Zfa, lower to fmv.x.w and fmvh.x.d.
919 if (!SDValue(Node, 0).use_empty()) {
920 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
921 Node->getOperand(0));
922 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
923 }
924 if (!SDValue(Node, 1).use_empty()) {
925 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
926 Node->getOperand(0));
927 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
928 }
929
930 CurDAG->RemoveDeadNode(Node);
931 return;
932 }
933 case ISD::SHL: {
934 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
935 if (!N1C)
936 break;
937 SDValue N0 = Node->getOperand(0);
938 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
939 !isa<ConstantSDNode>(N0.getOperand(1)))
940 break;
941 unsigned ShAmt = N1C->getZExtValue();
942 uint64_t Mask = N0.getConstantOperandVal(1);
943
944 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
945 // 32 leading zeros and C3 trailing zeros.
946 if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
947 unsigned XLen = Subtarget->getXLen();
948 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
949 unsigned TrailingZeros = llvm::countr_zero(Mask);
950 if (TrailingZeros > 0 && LeadingZeros == 32) {
951 SDNode *SRLIW = CurDAG->getMachineNode(
952 RISCV::SRLIW, DL, VT, N0->getOperand(0),
953 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
955 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
956 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
957 ReplaceNode(Node, SLLI);
958 return;
959 }
960 }
961 break;
962 }
963 case ISD::SRL: {
964 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
965 if (!N1C)
966 break;
967 SDValue N0 = Node->getOperand(0);
968 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
969 break;
970 unsigned ShAmt = N1C->getZExtValue();
971 uint64_t Mask = N0.getConstantOperandVal(1);
972
973 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
974 // 32 leading zeros and C3 trailing zeros.
975 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
976 unsigned XLen = Subtarget->getXLen();
977 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
978 unsigned TrailingZeros = llvm::countr_zero(Mask);
979 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
980 SDNode *SRLIW = CurDAG->getMachineNode(
981 RISCV::SRLIW, DL, VT, N0->getOperand(0),
982 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
984 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
985 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
986 ReplaceNode(Node, SLLI);
987 return;
988 }
989 }
990
991 // Optimize (srl (and X, C2), C) ->
992 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
993 // Where C2 is a mask with C3 trailing ones.
994 // Taking into account that the C2 may have had lower bits unset by
995 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
996 // This pattern occurs when type legalizing right shifts for types with
997 // less than XLen bits.
998 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
999 if (!isMask_64(Mask))
1000 break;
1001 unsigned TrailingOnes = llvm::countr_one(Mask);
1002 if (ShAmt >= TrailingOnes)
1003 break;
1004 // If the mask has 32 trailing ones, use SRLIW.
1005 if (TrailingOnes == 32) {
1006 SDNode *SRLIW =
1007 CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, N0->getOperand(0),
1008 CurDAG->getTargetConstant(ShAmt, DL, VT));
1009 ReplaceNode(Node, SRLIW);
1010 return;
1011 }
1012
1013 // Only do the remaining transforms if the AND has one use.
1014 if (!N0.hasOneUse())
1015 break;
1016
1017 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1018 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1019 SDNode *BEXTI = CurDAG->getMachineNode(
1020 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1021 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1022 ReplaceNode(Node, BEXTI);
1023 return;
1024 }
1025
1026 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1027 SDNode *SLLI =
1028 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1029 CurDAG->getTargetConstant(LShAmt, DL, VT));
1030 SDNode *SRLI = CurDAG->getMachineNode(
1031 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1032 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1033 ReplaceNode(Node, SRLI);
1034 return;
1035 }
1036 case ISD::SRA: {
1037 if (trySignedBitfieldExtract(Node))
1038 return;
1039
1040 // Optimize (sra (sext_inreg X, i16), C) ->
1041 // (srai (slli X, (XLen-16), (XLen-16) + C)
1042 // And (sra (sext_inreg X, i8), C) ->
1043 // (srai (slli X, (XLen-8), (XLen-8) + C)
1044 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1045 // This transform matches the code we get without Zbb. The shifts are more
1046 // compressible, and this can help expose CSE opportunities in the sdiv by
1047 // constant optimization.
1048 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1049 if (!N1C)
1050 break;
1051 SDValue N0 = Node->getOperand(0);
1052 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1053 break;
1054 unsigned ShAmt = N1C->getZExtValue();
1055 unsigned ExtSize =
1056 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1057 // ExtSize of 32 should use sraiw via tablegen pattern.
1058 if (ExtSize >= 32 || ShAmt >= ExtSize)
1059 break;
1060 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1061 SDNode *SLLI =
1062 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1063 CurDAG->getTargetConstant(LShAmt, DL, VT));
1064 SDNode *SRAI = CurDAG->getMachineNode(
1065 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1066 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1067 ReplaceNode(Node, SRAI);
1068 return;
1069 }
1070 case ISD::OR:
1071 case ISD::XOR:
1072 if (tryShrinkShlLogicImm(Node))
1073 return;
1074
1075 break;
1076 case ISD::AND: {
1077 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1078 if (!N1C)
1079 break;
1080 uint64_t C1 = N1C->getZExtValue();
1081 const bool isC1Mask = isMask_64(C1);
1082 const bool isC1ANDI = isInt<12>(C1);
1083
1084 SDValue N0 = Node->getOperand(0);
1085
1086 auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
1087 SDValue X, unsigned Msb,
1088 unsigned Lsb) {
1089 if (!Subtarget->hasVendorXTHeadBb())
1090 return false;
1091
1092 SDNode *TH_EXTU = CurDAG->getMachineNode(
1093 RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
1094 CurDAG->getTargetConstant(Lsb, DL, VT));
1095 ReplaceNode(Node, TH_EXTU);
1096 return true;
1097 };
1098
1099 bool LeftShift = N0.getOpcode() == ISD::SHL;
1100 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1101 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1102 if (!C)
1103 break;
1104 unsigned C2 = C->getZExtValue();
1105 unsigned XLen = Subtarget->getXLen();
1106 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1107
1108 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1109 // shift pair might offer more compression opportunities.
1110 // TODO: We could check for C extension here, but we don't have many lit
1111 // tests with the C extension enabled so not checking gets better
1112 // coverage.
1113 // TODO: What if ANDI faster than shift?
1114 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1115
1116 // Clear irrelevant bits in the mask.
1117 if (LeftShift)
1118 C1 &= maskTrailingZeros<uint64_t>(C2);
1119 else
1120 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1121
1122 // Some transforms should only be done if the shift has a single use or
1123 // the AND would become (srli (slli X, 32), 32)
1124 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1125
1126 SDValue X = N0.getOperand(0);
1127
1128 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1129 // with c3 leading zeros.
1130 if (!LeftShift && isC1Mask) {
1131 unsigned Leading = XLen - llvm::bit_width(C1);
1132 if (C2 < Leading) {
1133 // If the number of leading zeros is C2+32 this can be SRLIW.
1134 if (C2 + 32 == Leading) {
1135 SDNode *SRLIW = CurDAG->getMachineNode(
1136 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1137 ReplaceNode(Node, SRLIW);
1138 return;
1139 }
1140
1141 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1142 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1143 //
1144 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1145 // legalized and goes through DAG combine.
1146 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1147 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1148 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1149 SDNode *SRAIW =
1150 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1151 CurDAG->getTargetConstant(31, DL, VT));
1152 SDNode *SRLIW = CurDAG->getMachineNode(
1153 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1154 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1155 ReplaceNode(Node, SRLIW);
1156 return;
1157 }
1158
1159 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1160 // available.
1161 // Transform (and (srl x, C2), C1)
1162 // -> (<bfextract> x, msb, lsb)
1163 //
1164 // Make sure to keep this below the SRLIW cases, as we always want to
1165 // prefer the more common instruction.
1166 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1167 const unsigned Lsb = C2;
1168 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1169 return;
1170
1171 // (srli (slli x, c3-c2), c3).
1172 // Skip if we could use (zext.w (sraiw X, C2)).
1173 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1174 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1175 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1176 // Also Skip if we can use bexti or th.tst.
1177 Skip |= HasBitTest && Leading == XLen - 1;
1178 if (OneUseOrZExtW && !Skip) {
1179 SDNode *SLLI = CurDAG->getMachineNode(
1180 RISCV::SLLI, DL, VT, X,
1181 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1182 SDNode *SRLI = CurDAG->getMachineNode(
1183 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1184 CurDAG->getTargetConstant(Leading, DL, VT));
1185 ReplaceNode(Node, SRLI);
1186 return;
1187 }
1188 }
1189 }
1190
1191 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1192 // shifted by c2 bits with c3 leading zeros.
1193 if (LeftShift && isShiftedMask_64(C1)) {
1194 unsigned Leading = XLen - llvm::bit_width(C1);
1195
1196 if (C2 + Leading < XLen &&
1197 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1198 // Use slli.uw when possible.
1199 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1200 SDNode *SLLI_UW =
1201 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1202 CurDAG->getTargetConstant(C2, DL, VT));
1203 ReplaceNode(Node, SLLI_UW);
1204 return;
1205 }
1206
1207 // (srli (slli c2+c3), c3)
1208 if (OneUseOrZExtW && !IsCANDI) {
1209 SDNode *SLLI = CurDAG->getMachineNode(
1210 RISCV::SLLI, DL, VT, X,
1211 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1212 SDNode *SRLI = CurDAG->getMachineNode(
1213 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1214 CurDAG->getTargetConstant(Leading, DL, VT));
1215 ReplaceNode(Node, SRLI);
1216 return;
1217 }
1218 }
1219 }
1220
1221 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1222 // shifted mask with c2 leading zeros and c3 trailing zeros.
1223 if (!LeftShift && isShiftedMask_64(C1)) {
1224 unsigned Leading = XLen - llvm::bit_width(C1);
1225 unsigned Trailing = llvm::countr_zero(C1);
1226 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1227 !IsCANDI) {
1228 unsigned SrliOpc = RISCV::SRLI;
1229 // If the input is zexti32 we should use SRLIW.
1230 if (X.getOpcode() == ISD::AND &&
1231 isa<ConstantSDNode>(X.getOperand(1)) &&
1232 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1233 SrliOpc = RISCV::SRLIW;
1234 X = X.getOperand(0);
1235 }
1236 SDNode *SRLI = CurDAG->getMachineNode(
1237 SrliOpc, DL, VT, X,
1238 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1239 SDNode *SLLI = CurDAG->getMachineNode(
1240 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1241 CurDAG->getTargetConstant(Trailing, DL, VT));
1242 ReplaceNode(Node, SLLI);
1243 return;
1244 }
1245 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1246 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1247 OneUseOrZExtW && !IsCANDI) {
1248 SDNode *SRLIW = CurDAG->getMachineNode(
1249 RISCV::SRLIW, DL, VT, X,
1250 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1251 SDNode *SLLI = CurDAG->getMachineNode(
1252 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1253 CurDAG->getTargetConstant(Trailing, DL, VT));
1254 ReplaceNode(Node, SLLI);
1255 return;
1256 }
1257 }
1258
1259 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1260 // shifted mask with no leading zeros and c3 trailing zeros.
1261 if (LeftShift && isShiftedMask_64(C1)) {
1262 unsigned Leading = XLen - llvm::bit_width(C1);
1263 unsigned Trailing = llvm::countr_zero(C1);
1264 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1265 SDNode *SRLI = CurDAG->getMachineNode(
1266 RISCV::SRLI, DL, VT, X,
1267 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1268 SDNode *SLLI = CurDAG->getMachineNode(
1269 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1270 CurDAG->getTargetConstant(Trailing, DL, VT));
1271 ReplaceNode(Node, SLLI);
1272 return;
1273 }
1274 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1275 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1276 SDNode *SRLIW = CurDAG->getMachineNode(
1277 RISCV::SRLIW, DL, VT, X,
1278 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1279 SDNode *SLLI = CurDAG->getMachineNode(
1280 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1281 CurDAG->getTargetConstant(Trailing, DL, VT));
1282 ReplaceNode(Node, SLLI);
1283 return;
1284 }
1285 }
1286 }
1287
1288 // If C1 masks off the upper bits only (but can't be formed as an
1289 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1290 // available.
1291 // Transform (and x, C1)
1292 // -> (<bfextract> x, msb, lsb)
1293 if (isC1Mask && !isC1ANDI) {
1294 const unsigned Msb = llvm::bit_width(C1) - 1;
1295 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1296 return;
1297 }
1298
1299 if (tryShrinkShlLogicImm(Node))
1300 return;
1301
1302 break;
1303 }
1304 case ISD::MUL: {
1305 // Special case for calculating (mul (and X, C2), C1) where the full product
1306 // fits in XLen bits. We can shift X left by the number of leading zeros in
1307 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1308 // product has XLen trailing zeros, putting it in the output of MULHU. This
1309 // can avoid materializing a constant in a register for C2.
1310
1311 // RHS should be a constant.
1312 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1313 if (!N1C || !N1C->hasOneUse())
1314 break;
1315
1316 // LHS should be an AND with constant.
1317 SDValue N0 = Node->getOperand(0);
1318 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1319 break;
1320
1321 uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
1322
1323 // Constant should be a mask.
1324 if (!isMask_64(C2))
1325 break;
1326
1327 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1328 // multiple users or the constant is a simm12. This prevents inserting a
1329 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1330 // make it more costly to materialize. Otherwise, using a SLLI might allow
1331 // it to be compressed.
1332 bool IsANDIOrZExt =
1333 isInt<12>(C2) ||
1334 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1335 // With XTHeadBb, we can use TH.EXTU.
1336 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1337 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1338 break;
1339 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1340 // the constant is a simm32.
1341 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1342 // With XTHeadBb, we can use TH.EXTU.
1343 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1344 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1345 break;
1346
1347 // We need to shift left the AND input and C1 by a total of XLen bits.
1348
1349 // How far left do we need to shift the AND input?
1350 unsigned XLen = Subtarget->getXLen();
1351 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1352
1353 // The constant gets shifted by the remaining amount unless that would
1354 // shift bits out.
1355 uint64_t C1 = N1C->getZExtValue();
1356 unsigned ConstantShift = XLen - LeadingZeros;
1357 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1358 break;
1359
1360 uint64_t ShiftedC1 = C1 << ConstantShift;
1361 // If this RV32, we need to sign extend the constant.
1362 if (XLen == 32)
1363 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1364
1365 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1366 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1367 SDNode *SLLI =
1368 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1369 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1370 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1371 SDValue(SLLI, 0), SDValue(Imm, 0));
1372 ReplaceNode(Node, MULHU);
1373 return;
1374 }
1375 case ISD::LOAD: {
1376 if (tryIndexedLoad(Node))
1377 return;
1378 break;
1379 }
1381 unsigned IntNo = Node->getConstantOperandVal(0);
1382 switch (IntNo) {
1383 // By default we do not custom select any intrinsic.
1384 default:
1385 break;
1386 case Intrinsic::riscv_vmsgeu:
1387 case Intrinsic::riscv_vmsge: {
1388 SDValue Src1 = Node->getOperand(1);
1389 SDValue Src2 = Node->getOperand(2);
1390 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1391 bool IsCmpUnsignedZero = false;
1392 // Only custom select scalar second operand.
1393 if (Src2.getValueType() != XLenVT)
1394 break;
1395 // Small constants are handled with patterns.
1396 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1397 int64_t CVal = C->getSExtValue();
1398 if (CVal >= -15 && CVal <= 16) {
1399 if (!IsUnsigned || CVal != 0)
1400 break;
1401 IsCmpUnsignedZero = true;
1402 }
1403 }
1404 MVT Src1VT = Src1.getSimpleValueType();
1405 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;
1406 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1407 default:
1408 llvm_unreachable("Unexpected LMUL!");
1409#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \
1410 case RISCVII::VLMUL::lmulenum: \
1411 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1412 : RISCV::PseudoVMSLT_VX_##suffix; \
1413 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1414 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \
1415 break;
1416 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)
1417 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)
1418 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)
1420 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)
1421 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)
1422 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)
1423#undef CASE_VMSLT_VMNAND_VMSET_OPCODES
1424 }
1426 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1427 SDValue VL;
1428 selectVLOp(Node->getOperand(3), VL);
1429
1430 // If vmsgeu with 0 immediate, expand it to vmset.
1431 if (IsCmpUnsignedZero) {
1432 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));
1433 return;
1434 }
1435
1436 // Expand to
1437 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1438 SDValue Cmp = SDValue(
1439 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1440 0);
1441 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1442 {Cmp, Cmp, VL, SEW}));
1443 return;
1444 }
1445 case Intrinsic::riscv_vmsgeu_mask:
1446 case Intrinsic::riscv_vmsge_mask: {
1447 SDValue Src1 = Node->getOperand(2);
1448 SDValue Src2 = Node->getOperand(3);
1449 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1450 bool IsCmpUnsignedZero = false;
1451 // Only custom select scalar second operand.
1452 if (Src2.getValueType() != XLenVT)
1453 break;
1454 // Small constants are handled with patterns.
1455 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1456 int64_t CVal = C->getSExtValue();
1457 if (CVal >= -15 && CVal <= 16) {
1458 if (!IsUnsigned || CVal != 0)
1459 break;
1460 IsCmpUnsignedZero = true;
1461 }
1462 }
1463 MVT Src1VT = Src1.getSimpleValueType();
1464 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1465 VMOROpcode;
1466 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1467 default:
1468 llvm_unreachable("Unexpected LMUL!");
1469#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \
1470 case RISCVII::VLMUL::lmulenum: \
1471 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1472 : RISCV::PseudoVMSLT_VX_##suffix; \
1473 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
1474 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
1475 break;
1476 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)
1477 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)
1478 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)
1479 CASE_VMSLT_OPCODES(LMUL_1, M1, B8)
1480 CASE_VMSLT_OPCODES(LMUL_2, M2, B16)
1481 CASE_VMSLT_OPCODES(LMUL_4, M4, B32)
1482 CASE_VMSLT_OPCODES(LMUL_8, M8, B64)
1483#undef CASE_VMSLT_OPCODES
1484 }
1485 // Mask operations use the LMUL from the mask type.
1486 switch (RISCVTargetLowering::getLMUL(VT)) {
1487 default:
1488 llvm_unreachable("Unexpected LMUL!");
1489#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
1490 case RISCVII::VLMUL::lmulenum: \
1491 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
1492 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
1493 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
1494 break;
1495 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)
1496 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)
1497 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)
1502#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1503 }
1505 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1506 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1507 SDValue VL;
1508 selectVLOp(Node->getOperand(5), VL);
1509 SDValue MaskedOff = Node->getOperand(1);
1510 SDValue Mask = Node->getOperand(4);
1511
1512 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.
1513 if (IsCmpUnsignedZero) {
1514 // We don't need vmor if the MaskedOff and the Mask are the same
1515 // value.
1516 if (Mask == MaskedOff) {
1517 ReplaceUses(Node, Mask.getNode());
1518 return;
1519 }
1520 ReplaceNode(Node,
1521 CurDAG->getMachineNode(VMOROpcode, DL, VT,
1522 {Mask, MaskedOff, VL, MaskSEW}));
1523 return;
1524 }
1525
1526 // If the MaskedOff value and the Mask are the same value use
1527 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
1528 // This avoids needing to copy v0 to vd before starting the next sequence.
1529 if (Mask == MaskedOff) {
1530 SDValue Cmp = SDValue(
1531 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1532 0);
1533 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1534 {Mask, Cmp, VL, MaskSEW}));
1535 return;
1536 }
1537
1538 // Mask needs to be copied to V0.
1540 RISCV::V0, Mask, SDValue());
1541 SDValue Glue = Chain.getValue(1);
1542 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1543
1544 // Otherwise use
1545 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1546 // The result is mask undisturbed.
1547 // We use the same instructions to emulate mask agnostic behavior, because
1548 // the agnostic result can be either undisturbed or all 1.
1549 SDValue Cmp = SDValue(
1550 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1551 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1552 0);
1553 // vmxor.mm vd, vd, v0 is used to update active value.
1554 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1555 {Cmp, Mask, VL, MaskSEW}));
1556 return;
1557 }
1558 case Intrinsic::riscv_vsetvli:
1559 case Intrinsic::riscv_vsetvlimax:
1560 return selectVSETVLI(Node);
1561 }
1562 break;
1563 }
1565 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
1566 switch (IntNo) {
1567 // By default we do not custom select any intrinsic.
1568 default:
1569 break;
1570 case Intrinsic::riscv_vlseg2:
1571 case Intrinsic::riscv_vlseg3:
1572 case Intrinsic::riscv_vlseg4:
1573 case Intrinsic::riscv_vlseg5:
1574 case Intrinsic::riscv_vlseg6:
1575 case Intrinsic::riscv_vlseg7:
1576 case Intrinsic::riscv_vlseg8: {
1577 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1578 return;
1579 }
1580 case Intrinsic::riscv_vlseg2_mask:
1581 case Intrinsic::riscv_vlseg3_mask:
1582 case Intrinsic::riscv_vlseg4_mask:
1583 case Intrinsic::riscv_vlseg5_mask:
1584 case Intrinsic::riscv_vlseg6_mask:
1585 case Intrinsic::riscv_vlseg7_mask:
1586 case Intrinsic::riscv_vlseg8_mask: {
1587 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1588 return;
1589 }
1590 case Intrinsic::riscv_vlsseg2:
1591 case Intrinsic::riscv_vlsseg3:
1592 case Intrinsic::riscv_vlsseg4:
1593 case Intrinsic::riscv_vlsseg5:
1594 case Intrinsic::riscv_vlsseg6:
1595 case Intrinsic::riscv_vlsseg7:
1596 case Intrinsic::riscv_vlsseg8: {
1597 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1598 return;
1599 }
1600 case Intrinsic::riscv_vlsseg2_mask:
1601 case Intrinsic::riscv_vlsseg3_mask:
1602 case Intrinsic::riscv_vlsseg4_mask:
1603 case Intrinsic::riscv_vlsseg5_mask:
1604 case Intrinsic::riscv_vlsseg6_mask:
1605 case Intrinsic::riscv_vlsseg7_mask:
1606 case Intrinsic::riscv_vlsseg8_mask: {
1607 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1608 return;
1609 }
1610 case Intrinsic::riscv_vloxseg2:
1611 case Intrinsic::riscv_vloxseg3:
1612 case Intrinsic::riscv_vloxseg4:
1613 case Intrinsic::riscv_vloxseg5:
1614 case Intrinsic::riscv_vloxseg6:
1615 case Intrinsic::riscv_vloxseg7:
1616 case Intrinsic::riscv_vloxseg8:
1617 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1618 return;
1619 case Intrinsic::riscv_vluxseg2:
1620 case Intrinsic::riscv_vluxseg3:
1621 case Intrinsic::riscv_vluxseg4:
1622 case Intrinsic::riscv_vluxseg5:
1623 case Intrinsic::riscv_vluxseg6:
1624 case Intrinsic::riscv_vluxseg7:
1625 case Intrinsic::riscv_vluxseg8:
1626 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1627 return;
1628 case Intrinsic::riscv_vloxseg2_mask:
1629 case Intrinsic::riscv_vloxseg3_mask:
1630 case Intrinsic::riscv_vloxseg4_mask:
1631 case Intrinsic::riscv_vloxseg5_mask:
1632 case Intrinsic::riscv_vloxseg6_mask:
1633 case Intrinsic::riscv_vloxseg7_mask:
1634 case Intrinsic::riscv_vloxseg8_mask:
1635 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1636 return;
1637 case Intrinsic::riscv_vluxseg2_mask:
1638 case Intrinsic::riscv_vluxseg3_mask:
1639 case Intrinsic::riscv_vluxseg4_mask:
1640 case Intrinsic::riscv_vluxseg5_mask:
1641 case Intrinsic::riscv_vluxseg6_mask:
1642 case Intrinsic::riscv_vluxseg7_mask:
1643 case Intrinsic::riscv_vluxseg8_mask:
1644 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1645 return;
1646 case Intrinsic::riscv_vlseg8ff:
1647 case Intrinsic::riscv_vlseg7ff:
1648 case Intrinsic::riscv_vlseg6ff:
1649 case Intrinsic::riscv_vlseg5ff:
1650 case Intrinsic::riscv_vlseg4ff:
1651 case Intrinsic::riscv_vlseg3ff:
1652 case Intrinsic::riscv_vlseg2ff: {
1653 selectVLSEGFF(Node, /*IsMasked*/ false);
1654 return;
1655 }
1656 case Intrinsic::riscv_vlseg8ff_mask:
1657 case Intrinsic::riscv_vlseg7ff_mask:
1658 case Intrinsic::riscv_vlseg6ff_mask:
1659 case Intrinsic::riscv_vlseg5ff_mask:
1660 case Intrinsic::riscv_vlseg4ff_mask:
1661 case Intrinsic::riscv_vlseg3ff_mask:
1662 case Intrinsic::riscv_vlseg2ff_mask: {
1663 selectVLSEGFF(Node, /*IsMasked*/ true);
1664 return;
1665 }
1666 case Intrinsic::riscv_vloxei:
1667 case Intrinsic::riscv_vloxei_mask:
1668 case Intrinsic::riscv_vluxei:
1669 case Intrinsic::riscv_vluxei_mask: {
1670 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1671 IntNo == Intrinsic::riscv_vluxei_mask;
1672 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1673 IntNo == Intrinsic::riscv_vloxei_mask;
1674
1675 MVT VT = Node->getSimpleValueType(0);
1676 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1677
1678 unsigned CurOp = 2;
1679 // Masked intrinsic only have TU version pseduo instructions.
1680 bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef();
1682 if (IsTU)
1683 Operands.push_back(Node->getOperand(CurOp++));
1684 else
1685 // Skip the undef passthru operand for nomask TA version pseudo
1686 CurOp++;
1687
1688 MVT IndexVT;
1689 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1690 /*IsStridedOrIndexed*/ true, Operands,
1691 /*IsLoad=*/true, &IndexVT);
1692
1694 "Element count mismatch");
1695
1697 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1698 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1699 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1700 report_fatal_error("The V extension does not support EEW=64 for index "
1701 "values when XLEN=32");
1702 }
1703 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
1704 IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
1705 static_cast<unsigned>(IndexLMUL));
1706 MachineSDNode *Load =
1707 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1708
1709 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1710 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1711
1712 ReplaceNode(Node, Load);
1713 return;
1714 }
1715 case Intrinsic::riscv_vlm:
1716 case Intrinsic::riscv_vle:
1717 case Intrinsic::riscv_vle_mask:
1718 case Intrinsic::riscv_vlse:
1719 case Intrinsic::riscv_vlse_mask: {
1720 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
1721 IntNo == Intrinsic::riscv_vlse_mask;
1722 bool IsStrided =
1723 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
1724
1725 MVT VT = Node->getSimpleValueType(0);
1726 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1727
1728 unsigned CurOp = 2;
1729 // The riscv_vlm intrinsic are always tail agnostic and no passthru operand.
1730 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
1731 // Masked intrinsic only have TU version pseduo instructions.
1732 bool IsTU = HasPassthruOperand &&
1733 (IsMasked || !Node->getOperand(CurOp).isUndef());
1735 if (IsTU)
1736 Operands.push_back(Node->getOperand(CurOp++));
1737 else if (HasPassthruOperand)
1738 // Skip the undef passthru operand for nomask TA version pseudo
1739 CurOp++;
1740
1741 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1742 Operands, /*IsLoad=*/true);
1743
1745 const RISCV::VLEPseudo *P =
1746 RISCV::getVLEPseudo(IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW,
1747 static_cast<unsigned>(LMUL));
1748 MachineSDNode *Load =
1749 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1750
1751 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1752 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1753
1754 ReplaceNode(Node, Load);
1755 return;
1756 }
1757 case Intrinsic::riscv_vleff:
1758 case Intrinsic::riscv_vleff_mask: {
1759 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
1760
1761 MVT VT = Node->getSimpleValueType(0);
1762 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1763
1764 unsigned CurOp = 2;
1765 // Masked intrinsic only have TU version pseduo instructions.
1766 bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef();
1768 if (IsTU)
1769 Operands.push_back(Node->getOperand(CurOp++));
1770 else
1771 // Skip the undef passthru operand for nomask TA version pseudo
1772 CurOp++;
1773
1774 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1775 /*IsStridedOrIndexed*/ false, Operands,
1776 /*IsLoad=*/true);
1777
1779 const RISCV::VLEPseudo *P =
1780 RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true,
1781 Log2SEW, static_cast<unsigned>(LMUL));
1783 P->Pseudo, DL, Node->getVTList(), Operands);
1784 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1785 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1786
1787 ReplaceNode(Node, Load);
1788 return;
1789 }
1790 }
1791 break;
1792 }
1793 case ISD::INTRINSIC_VOID: {
1794 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
1795 switch (IntNo) {
1796 case Intrinsic::riscv_vsseg2:
1797 case Intrinsic::riscv_vsseg3:
1798 case Intrinsic::riscv_vsseg4:
1799 case Intrinsic::riscv_vsseg5:
1800 case Intrinsic::riscv_vsseg6:
1801 case Intrinsic::riscv_vsseg7:
1802 case Intrinsic::riscv_vsseg8: {
1803 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1804 return;
1805 }
1806 case Intrinsic::riscv_vsseg2_mask:
1807 case Intrinsic::riscv_vsseg3_mask:
1808 case Intrinsic::riscv_vsseg4_mask:
1809 case Intrinsic::riscv_vsseg5_mask:
1810 case Intrinsic::riscv_vsseg6_mask:
1811 case Intrinsic::riscv_vsseg7_mask:
1812 case Intrinsic::riscv_vsseg8_mask: {
1813 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1814 return;
1815 }
1816 case Intrinsic::riscv_vssseg2:
1817 case Intrinsic::riscv_vssseg3:
1818 case Intrinsic::riscv_vssseg4:
1819 case Intrinsic::riscv_vssseg5:
1820 case Intrinsic::riscv_vssseg6:
1821 case Intrinsic::riscv_vssseg7:
1822 case Intrinsic::riscv_vssseg8: {
1823 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1824 return;
1825 }
1826 case Intrinsic::riscv_vssseg2_mask:
1827 case Intrinsic::riscv_vssseg3_mask:
1828 case Intrinsic::riscv_vssseg4_mask:
1829 case Intrinsic::riscv_vssseg5_mask:
1830 case Intrinsic::riscv_vssseg6_mask:
1831 case Intrinsic::riscv_vssseg7_mask:
1832 case Intrinsic::riscv_vssseg8_mask: {
1833 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1834 return;
1835 }
1836 case Intrinsic::riscv_vsoxseg2:
1837 case Intrinsic::riscv_vsoxseg3:
1838 case Intrinsic::riscv_vsoxseg4:
1839 case Intrinsic::riscv_vsoxseg5:
1840 case Intrinsic::riscv_vsoxseg6:
1841 case Intrinsic::riscv_vsoxseg7:
1842 case Intrinsic::riscv_vsoxseg8:
1843 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1844 return;
1845 case Intrinsic::riscv_vsuxseg2:
1846 case Intrinsic::riscv_vsuxseg3:
1847 case Intrinsic::riscv_vsuxseg4:
1848 case Intrinsic::riscv_vsuxseg5:
1849 case Intrinsic::riscv_vsuxseg6:
1850 case Intrinsic::riscv_vsuxseg7:
1851 case Intrinsic::riscv_vsuxseg8:
1852 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1853 return;
1854 case Intrinsic::riscv_vsoxseg2_mask:
1855 case Intrinsic::riscv_vsoxseg3_mask:
1856 case Intrinsic::riscv_vsoxseg4_mask:
1857 case Intrinsic::riscv_vsoxseg5_mask:
1858 case Intrinsic::riscv_vsoxseg6_mask:
1859 case Intrinsic::riscv_vsoxseg7_mask:
1860 case Intrinsic::riscv_vsoxseg8_mask:
1861 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1862 return;
1863 case Intrinsic::riscv_vsuxseg2_mask:
1864 case Intrinsic::riscv_vsuxseg3_mask:
1865 case Intrinsic::riscv_vsuxseg4_mask:
1866 case Intrinsic::riscv_vsuxseg5_mask:
1867 case Intrinsic::riscv_vsuxseg6_mask:
1868 case Intrinsic::riscv_vsuxseg7_mask:
1869 case Intrinsic::riscv_vsuxseg8_mask:
1870 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1871 return;
1872 case Intrinsic::riscv_vsoxei:
1873 case Intrinsic::riscv_vsoxei_mask:
1874 case Intrinsic::riscv_vsuxei:
1875 case Intrinsic::riscv_vsuxei_mask: {
1876 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
1877 IntNo == Intrinsic::riscv_vsuxei_mask;
1878 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
1879 IntNo == Intrinsic::riscv_vsoxei_mask;
1880
1881 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
1882 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1883
1884 unsigned CurOp = 2;
1886 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
1887
1888 MVT IndexVT;
1889 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1890 /*IsStridedOrIndexed*/ true, Operands,
1891 /*IsLoad=*/false, &IndexVT);
1892
1894 "Element count mismatch");
1895
1897 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1898 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1899 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1900 report_fatal_error("The V extension does not support EEW=64 for index "
1901 "values when XLEN=32");
1902 }
1903 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
1904 IsMasked, /*TU*/ false, IsOrdered, IndexLog2EEW,
1905 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
1906 MachineSDNode *Store =
1907 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1908
1909 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1910 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
1911
1912 ReplaceNode(Node, Store);
1913 return;
1914 }
1915 case Intrinsic::riscv_vsm:
1916 case Intrinsic::riscv_vse:
1917 case Intrinsic::riscv_vse_mask:
1918 case Intrinsic::riscv_vsse:
1919 case Intrinsic::riscv_vsse_mask: {
1920 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
1921 IntNo == Intrinsic::riscv_vsse_mask;
1922 bool IsStrided =
1923 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
1924
1925 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
1926 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1927
1928 unsigned CurOp = 2;
1930 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
1931
1932 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1933 Operands);
1934
1936 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
1937 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
1938 MachineSDNode *Store =
1939 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1940 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1941 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
1942
1943 ReplaceNode(Node, Store);
1944 return;
1945 }
1946 }
1947 break;
1948 }
1949 case ISD::BITCAST: {
1950 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
1951 // Just drop bitcasts between vectors if both are fixed or both are
1952 // scalable.
1953 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
1954 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
1955 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
1956 CurDAG->RemoveDeadNode(Node);
1957 return;
1958 }
1959 break;
1960 }
1961 case ISD::INSERT_SUBVECTOR: {
1962 SDValue V = Node->getOperand(0);
1963 SDValue SubV = Node->getOperand(1);
1964 SDLoc DL(SubV);
1965 auto Idx = Node->getConstantOperandVal(2);
1966 MVT SubVecVT = SubV.getSimpleValueType();
1967
1968 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
1969 MVT SubVecContainerVT = SubVecVT;
1970 // Establish the correct scalable-vector types for any fixed-length type.
1971 if (SubVecVT.isFixedLengthVector())
1972 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
1973 if (VT.isFixedLengthVector())
1974 VT = TLI.getContainerForFixedLengthVector(VT);
1975
1976 const auto *TRI = Subtarget->getRegisterInfo();
1977 unsigned SubRegIdx;
1978 std::tie(SubRegIdx, Idx) =
1980 VT, SubVecContainerVT, Idx, TRI);
1981
1982 // If the Idx hasn't been completely eliminated then this is a subvector
1983 // insert which doesn't naturally align to a vector register. These must
1984 // be handled using instructions to manipulate the vector registers.
1985 if (Idx != 0)
1986 break;
1987
1988 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
1989 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
1990 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
1991 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
1992 (void)IsSubVecPartReg; // Silence unused variable warning without asserts.
1993 assert((!IsSubVecPartReg || V.isUndef()) &&
1994 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
1995 "the subvector is smaller than a full-sized register");
1996
1997 // If we haven't set a SubRegIdx, then we must be going between
1998 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
1999 if (SubRegIdx == RISCV::NoSubRegister) {
2000 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT);
2002 InRegClassID &&
2003 "Unexpected subvector extraction");
2004 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2005 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2006 DL, VT, SubV, RC);
2007 ReplaceNode(Node, NewNode);
2008 return;
2009 }
2010
2011 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2012 ReplaceNode(Node, Insert.getNode());
2013 return;
2014 }
2016 SDValue V = Node->getOperand(0);
2017 auto Idx = Node->getConstantOperandVal(1);
2018 MVT InVT = V.getSimpleValueType();
2019 SDLoc DL(V);
2020
2021 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2022 MVT SubVecContainerVT = VT;
2023 // Establish the correct scalable-vector types for any fixed-length type.
2024 if (VT.isFixedLengthVector())
2025 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2026 if (InVT.isFixedLengthVector())
2027 InVT = TLI.getContainerForFixedLengthVector(InVT);
2028
2029 const auto *TRI = Subtarget->getRegisterInfo();
2030 unsigned SubRegIdx;
2031 std::tie(SubRegIdx, Idx) =
2033 InVT, SubVecContainerVT, Idx, TRI);
2034
2035 // If the Idx hasn't been completely eliminated then this is a subvector
2036 // extract which doesn't naturally align to a vector register. These must
2037 // be handled using instructions to manipulate the vector registers.
2038 if (Idx != 0)
2039 break;
2040
2041 // If we haven't set a SubRegIdx, then we must be going between
2042 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2043 if (SubRegIdx == RISCV::NoSubRegister) {
2044 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2046 InRegClassID &&
2047 "Unexpected subvector extraction");
2048 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2049 SDNode *NewNode =
2050 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2051 ReplaceNode(Node, NewNode);
2052 return;
2053 }
2054
2055 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2056 ReplaceNode(Node, Extract.getNode());
2057 return;
2058 }
2062 case RISCVISD::VFMV_V_F_VL: {
2063 // Try to match splat of a scalar load to a strided load with stride of x0.
2064 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2065 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2066 if (!Node->getOperand(0).isUndef())
2067 break;
2068 SDValue Src = Node->getOperand(1);
2069 auto *Ld = dyn_cast<LoadSDNode>(Src);
2070 if (!Ld)
2071 break;
2072 EVT MemVT = Ld->getMemoryVT();
2073 // The memory VT should be the same size as the element type.
2074 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2075 break;
2076 if (!IsProfitableToFold(Src, Node, Node) ||
2077 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2078 break;
2079
2080 SDValue VL;
2081 if (IsScalarMove) {
2082 // We could deal with more VL if we update the VSETVLI insert pass to
2083 // avoid introducing more VSETVLI.
2084 if (!isOneConstant(Node->getOperand(2)))
2085 break;
2086 selectVLOp(Node->getOperand(2), VL);
2087 } else
2088 selectVLOp(Node->getOperand(2), VL);
2089
2090 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2092
2093 // If VL=1, then we don't need to do a strided load and can just do a
2094 // regular load.
2095 bool IsStrided = !isOneConstant(VL);
2096
2097 // Only do a strided load if we have optimized zero-stride vector load.
2098 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2099 break;
2100
2101 SmallVector<SDValue> Operands = {Ld->getBasePtr()};
2102 if (IsStrided)
2103 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2104 Operands.append({VL, SEW, Ld->getChain()});
2105
2107 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2108 /*IsMasked*/ false, /*IsTU*/ false, IsStrided, /*FF*/ false,
2109 Log2SEW, static_cast<unsigned>(LMUL));
2110 MachineSDNode *Load =
2111 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2112 // Update the chain.
2113 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2114 // Record the mem-refs
2115 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2116 // Replace the splat with the vlse.
2117 ReplaceNode(Node, Load);
2118 return;
2119 }
2120 }
2121
2122 // Select the default instruction.
2123 SelectCode(Node);
2124}
2125
2127 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
2128 // Always produce a register and immediate operand, as expected by
2129 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2130 switch (ConstraintID) {
2132 SDValue Op0, Op1;
2133 bool Found = SelectAddrRegImm(Op, Op0, Op1);
2134 assert(Found && "SelectAddrRegImm should always succeed");
2135 (void)Found;
2136 OutOps.push_back(Op0);
2137 OutOps.push_back(Op1);
2138 return false;
2139 }
2141 OutOps.push_back(Op);
2142 OutOps.push_back(
2143 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2144 return false;
2145 default:
2146 break;
2147 }
2148
2149 return true;
2150}
2151
2153 SDValue &Offset) {
2154 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2155 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2156 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2157 return true;
2158 }
2159
2160 return false;
2161}
2162
2163// Select a frame index and an optional immediate offset from an ADD or OR.
2165 SDValue &Offset) {
2167 return true;
2168
2170 return false;
2171
2172 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
2173 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2174 if (isInt<12>(CVal)) {
2175 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
2176 Subtarget->getXLenVT());
2178 Subtarget->getXLenVT());
2179 return true;
2180 }
2181 }
2182
2183 return false;
2184}
2185
2186// Fold constant addresses.
2187static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2188 const MVT VT, const RISCVSubtarget *Subtarget,
2190 if (!isa<ConstantSDNode>(Addr))
2191 return false;
2192
2193 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2194
2195 // If the constant is a simm12, we can fold the whole constant and use X0 as
2196 // the base. If the constant can be materialized with LUI+simm12, use LUI as
2197 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2198 int64_t Lo12 = SignExtend64<12>(CVal);
2199 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2200 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2201 if (Hi) {
2202 int64_t Hi20 = (Hi >> 12) & 0xfffff;
2203 Base = SDValue(
2204 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2205 CurDAG->getTargetConstant(Hi20, DL, VT)),
2206 0);
2207 } else {
2208 Base = CurDAG->getRegister(RISCV::X0, VT);
2209 }
2210 Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2211 return true;
2212 }
2213
2214 // Ask how constant materialization would handle this constant.
2216 RISCVMatInt::generateInstSeq(CVal, Subtarget->getFeatureBits());
2217
2218 // If the last instruction would be an ADDI, we can fold its immediate and
2219 // emit the rest of the sequence as the base.
2220 if (Seq.back().getOpcode() != RISCV::ADDI)
2221 return false;
2222 Lo12 = Seq.back().getImm();
2223
2224 // Drop the last instruction.
2225 Seq.pop_back();
2226 assert(!Seq.empty() && "Expected more instructions in sequence");
2227
2228 Base = selectImmSeq(CurDAG, DL, VT, Seq);
2229 Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2230 return true;
2231}
2232
2233// Is this ADD instruction only used as the base pointer of scalar loads and
2234// stores?
2236 for (auto *Use : Add->uses()) {
2237 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
2238 Use->getOpcode() != ISD::ATOMIC_LOAD &&
2239 Use->getOpcode() != ISD::ATOMIC_STORE)
2240 return false;
2241 EVT VT = cast<MemSDNode>(Use)->getMemoryVT();
2242 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2243 VT != MVT::f64)
2244 return false;
2245 // Don't allow stores of the value. It must be used as the address.
2246 if (Use->getOpcode() == ISD::STORE &&
2247 cast<StoreSDNode>(Use)->getValue() == Add)
2248 return false;
2249 if (Use->getOpcode() == ISD::ATOMIC_STORE &&
2250 cast<AtomicSDNode>(Use)->getVal() == Add)
2251 return false;
2252 }
2253
2254 return true;
2255}
2256
2258 unsigned MaxShiftAmount,
2260 SDValue &Scale) {
2261 EVT VT = Addr.getSimpleValueType();
2262 auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2263 SDValue &Shift) {
2264 uint64_t ShiftAmt = 0;
2265 Index = N;
2266
2267 if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
2268 // Only match shifts by a value in range [0, MaxShiftAmount].
2269 if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
2270 Index = N.getOperand(0);
2271 ShiftAmt = N.getConstantOperandVal(1);
2272 }
2273 }
2274
2275 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
2276 return ShiftAmt != 0;
2277 };
2278
2279 if (Addr.getOpcode() == ISD::ADD) {
2280 if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2281 SDValue AddrB = Addr.getOperand(0);
2282 if (AddrB.getOpcode() == ISD::ADD &&
2283 UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
2284 !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
2285 isInt<12>(C1->getSExtValue())) {
2286 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2287 SDValue C1Val =
2288 CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
2289 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
2290 AddrB.getOperand(1), C1Val),
2291 0);
2292 return true;
2293 }
2294 } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
2295 Base = Addr.getOperand(1);
2296 return true;
2297 } else {
2298 UnwrapShl(Addr.getOperand(1), Index, Scale);
2299 Base = Addr.getOperand(0);
2300 return true;
2301 }
2302 } else if (UnwrapShl(Addr, Index, Scale)) {
2303 EVT VT = Addr.getValueType();
2304 Base = CurDAG->getRegister(RISCV::X0, VT);
2305 return true;
2306 }
2307
2308 return false;
2309}
2310
2312 SDValue &Offset, bool IsINX) {
2314 return true;
2315
2316 SDLoc DL(Addr);
2317 MVT VT = Addr.getSimpleValueType();
2318
2319 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2320 Base = Addr.getOperand(0);
2321 Offset = Addr.getOperand(1);
2322 return true;
2323 }
2324
2325 int64_t RV32ZdinxRange = IsINX ? 4 : 0;
2327 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2328 if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
2329 Base = Addr.getOperand(0);
2330 if (Base.getOpcode() == RISCVISD::ADD_LO) {
2331 SDValue LoOperand = Base.getOperand(1);
2332 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2333 // If the Lo in (ADD_LO hi, lo) is a global variable's address
2334 // (its low part, really), then we can rely on the alignment of that
2335 // variable to provide a margin of safety before low part can overflow
2336 // the 12 bits of the load/store offset. Check if CVal falls within
2337 // that margin; if so (low part + CVal) can't overflow.
2338 const DataLayout &DL = CurDAG->getDataLayout();
2339 Align Alignment = commonAlignment(
2340 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2341 if (CVal == 0 || Alignment > CVal) {
2342 int64_t CombinedOffset = CVal + GA->getOffset();
2343 Base = Base.getOperand(0);
2345 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2346 CombinedOffset, GA->getTargetFlags());
2347 return true;
2348 }
2349 }
2350 }
2351
2352 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2353 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2354 Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2355 return true;
2356 }
2357 }
2358
2359 // Handle ADD with large immediates.
2360 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2361 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2362 assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
2363 "simm12 not already handled?");
2364
2365 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2366 // an ADDI for part of the offset and fold the rest into the load/store.
2367 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2368 if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
2369 int64_t Adj = CVal < 0 ? -2048 : 2047;
2370 Base = SDValue(
2371 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2372 CurDAG->getTargetConstant(Adj, DL, VT)),
2373 0);
2374 Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);
2375 return true;
2376 }
2377
2378 // For larger immediates, we might be able to save one instruction from
2379 // constant materialization by folding the Lo12 bits of the immediate into
2380 // the address. We should only do this if the ADD is only used by loads and
2381 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2382 // separately with the full materialized immediate creating extra
2383 // instructions.
2384 if (isWorthFoldingAdd(Addr) &&
2385 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2386 Offset)) {
2387 // Insert an ADD instruction with the materialized Hi52 bits.
2388 Base = SDValue(
2389 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2390 0);
2391 return true;
2392 }
2393 }
2394
2395 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset))
2396 return true;
2397
2398 Base = Addr;
2399 Offset = CurDAG->getTargetConstant(0, DL, VT);
2400 return true;
2401}
2402
2404 SDValue &ShAmt) {
2405 ShAmt = N;
2406
2407 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
2408 // amount. If there is an AND on the shift amount, we can bypass it if it
2409 // doesn't affect any of those bits.
2410 if (ShAmt.getOpcode() == ISD::AND && isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2411 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
2412
2413 // Since the max shift amount is a power of 2 we can subtract 1 to make a
2414 // mask that covers the bits needed to represent all shift amounts.
2415 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
2416 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
2417
2418 if (ShMask.isSubsetOf(AndMask)) {
2419 ShAmt = ShAmt.getOperand(0);
2420 } else {
2421 // SimplifyDemandedBits may have optimized the mask so try restoring any
2422 // bits that are known zero.
2423 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
2424 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
2425 return true;
2426 ShAmt = ShAmt.getOperand(0);
2427 }
2428 }
2429
2430 if (ShAmt.getOpcode() == ISD::ADD &&
2431 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2432 uint64_t Imm = ShAmt.getConstantOperandVal(1);
2433 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
2434 // to avoid the ADD.
2435 if (Imm != 0 && Imm % ShiftWidth == 0) {
2436 ShAmt = ShAmt.getOperand(0);
2437 return true;
2438 }
2439 } else if (ShAmt.getOpcode() == ISD::SUB &&
2440 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
2441 uint64_t Imm = ShAmt.getConstantOperandVal(0);
2442 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2443 // generate a NEG instead of a SUB of a constant.
2444 if (Imm != 0 && Imm % ShiftWidth == 0) {
2445 SDLoc DL(ShAmt);
2446 EVT VT = ShAmt.getValueType();
2447 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
2448 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
2449 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
2450 ShAmt.getOperand(1));
2451 ShAmt = SDValue(Neg, 0);
2452 return true;
2453 }
2454 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2455 // to generate a NOT instead of a SUB of a constant.
2456 if (Imm % ShiftWidth == ShiftWidth - 1) {
2457 SDLoc DL(ShAmt);
2458 EVT VT = ShAmt.getValueType();
2459 MachineSDNode *Not =
2460 CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1),
2461 CurDAG->getTargetConstant(-1, DL, VT));
2462 ShAmt = SDValue(Not, 0);
2463 return true;
2464 }
2465 }
2466
2467 return true;
2468}
2469
2470/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2471/// check for equality with 0. This function emits instructions that convert the
2472/// seteq/setne into something that can be compared with 0.
2473/// When \p Equal is false, we match setne. When \p Equal is true, we match
2474/// seteq.
2476 SDValue &Val) {
2477 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
2478 "Unexpected condition code!");
2479
2480 // We're looking for a setcc.
2481 if (N->getOpcode() != ISD::SETCC)
2482 return false;
2483
2484 // Must be an equality comparison.
2485 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
2486 if (CCVal != ExpectedCCVal)
2487 return false;
2488
2489 SDValue LHS = N->getOperand(0);
2490 SDValue RHS = N->getOperand(1);
2491
2492 if (!LHS.getValueType().isInteger())
2493 return false;
2494
2495 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
2496 if (isNullConstant(RHS)) {
2497 Val = LHS;
2498 return true;
2499 }
2500
2501 SDLoc DL(N);
2502
2503 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
2504 int64_t CVal = C->getSExtValue();
2505 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2506 // non-zero otherwise.
2507 if (CVal == -2048) {
2508 Val =
2510 RISCV::XORI, DL, N->getValueType(0), LHS,
2511 CurDAG->getTargetConstant(CVal, DL, N->getValueType(0))),
2512 0);
2513 return true;
2514 }
2515 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
2516 // LHS is equal to the RHS and non-zero otherwise.
2517 if (isInt<12>(CVal) || CVal == 2048) {
2518 Val =
2520 RISCV::ADDI, DL, N->getValueType(0), LHS,
2521 CurDAG->getTargetConstant(-CVal, DL, N->getValueType(0))),
2522 0);
2523 return true;
2524 }
2525 }
2526
2527 // If nothing else we can XOR the LHS and RHS to produce zero if they are
2528 // equal and a non-zero value if they aren't.
2529 Val = SDValue(
2530 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
2531 return true;
2532}
2533
2535 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
2536 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
2537 Val = N.getOperand(0);
2538 return true;
2539 }
2540
2541 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
2542 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
2543 return N;
2544
2545 SDValue N0 = N.getOperand(0);
2546 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2547 N.getConstantOperandVal(1) == ShiftAmt &&
2548 N0.getConstantOperandVal(1) == ShiftAmt)
2549 return N0.getOperand(0);
2550
2551 return N;
2552 };
2553
2554 MVT VT = N.getSimpleValueType();
2555 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
2556 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
2557 return true;
2558 }
2559
2560 return false;
2561}
2562
2564 if (N.getOpcode() == ISD::AND) {
2565 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
2566 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
2567 Val = N.getOperand(0);
2568 return true;
2569 }
2570 }
2571 MVT VT = N.getSimpleValueType();
2572 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
2573 if (CurDAG->MaskedValueIsZero(N, Mask)) {
2574 Val = N;
2575 return true;
2576 }
2577
2578 return false;
2579}
2580
2581/// Look for various patterns that can be done with a SHL that can be folded
2582/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
2583/// SHXADD we are trying to match.
2585 SDValue &Val) {
2586 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
2587 SDValue N0 = N.getOperand(0);
2588
2589 bool LeftShift = N0.getOpcode() == ISD::SHL;
2590 if ((LeftShift || N0.getOpcode() == ISD::SRL) &&
2591 isa<ConstantSDNode>(N0.getOperand(1))) {
2592 uint64_t Mask = N.getConstantOperandVal(1);
2593 unsigned C2 = N0.getConstantOperandVal(1);
2594
2595 unsigned XLen = Subtarget->getXLen();
2596 if (LeftShift)
2597 Mask &= maskTrailingZeros<uint64_t>(C2);
2598 else
2599 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
2600
2601 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
2602 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
2603 // followed by a SHXADD with c3 for the X amount.
2604 if (isShiftedMask_64(Mask)) {
2605 unsigned Leading = XLen - llvm::bit_width(Mask);
2606 unsigned Trailing = llvm::countr_zero(Mask);
2607 if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
2608 SDLoc DL(N);
2609 EVT VT = N.getValueType();
2611 RISCV::SRLI, DL, VT, N0.getOperand(0),
2612 CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
2613 0);
2614 return true;
2615 }
2616 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
2617 // leading zeros and c3 trailing zeros. We can use an SRLI by C3
2618 // followed by a SHXADD using c3 for the X amount.
2619 if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
2620 SDLoc DL(N);
2621 EVT VT = N.getValueType();
2622 Val = SDValue(
2624 RISCV::SRLI, DL, VT, N0.getOperand(0),
2625 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
2626 0);
2627 return true;
2628 }
2629 }
2630 }
2631 }
2632
2633 bool LeftShift = N.getOpcode() == ISD::SHL;
2634 if ((LeftShift || N.getOpcode() == ISD::SRL) &&
2635 isa<ConstantSDNode>(N.getOperand(1))) {
2636 SDValue N0 = N.getOperand(0);
2637 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
2638 isa<ConstantSDNode>(N0.getOperand(1))) {
2639 uint64_t Mask = N0.getConstantOperandVal(1);
2640 if (isShiftedMask_64(Mask)) {
2641 unsigned C1 = N.getConstantOperandVal(1);
2642 unsigned XLen = Subtarget->getXLen();
2643 unsigned Leading = XLen - llvm::bit_width(Mask);
2644 unsigned Trailing = llvm::countr_zero(Mask);
2645 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
2646 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
2647 if (LeftShift && Leading == 32 && Trailing > 0 &&
2648 (Trailing + C1) == ShAmt) {
2649 SDLoc DL(N);
2650 EVT VT = N.getValueType();
2652 RISCV::SRLIW, DL, VT, N0.getOperand(0),
2653 CurDAG->getTargetConstant(Trailing, DL, VT)),
2654 0);
2655 return true;
2656 }
2657 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
2658 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
2659 if (!LeftShift && Leading == 32 && Trailing > C1 &&
2660 (Trailing - C1) == ShAmt) {
2661 SDLoc DL(N);
2662 EVT VT = N.getValueType();
2664 RISCV::SRLIW, DL, VT, N0.getOperand(0),
2665 CurDAG->getTargetConstant(Trailing, DL, VT)),
2666 0);
2667 return true;
2668 }
2669 }
2670 }
2671 }
2672
2673 return false;
2674}
2675
2676/// Look for various patterns that can be done with a SHL that can be folded
2677/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
2678/// SHXADD_UW we are trying to match.
2680 SDValue &Val) {
2681 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
2682 N.hasOneUse()) {
2683 SDValue N0 = N.getOperand(0);
2684 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2685 N0.hasOneUse()) {
2686 uint64_t Mask = N.getConstantOperandVal(1);
2687 unsigned C2 = N0.getConstantOperandVal(1);
2688
2689 Mask &= maskTrailingZeros<uint64_t>(C2);
2690
2691 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
2692 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
2693 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
2694 if (isShiftedMask_64(Mask)) {
2695 unsigned Leading = llvm::countl_zero(Mask);
2696 unsigned Trailing = llvm::countr_zero(Mask);
2697 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
2698 SDLoc DL(N);
2699 EVT VT = N.getValueType();
2701 RISCV::SLLI, DL, VT, N0.getOperand(0),
2702 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
2703 0);
2704 return true;
2705 }
2706 }
2707 }
2708 }
2709
2710 return false;
2711}
2712
2713// Return true if all users of this SDNode* only consume the lower \p Bits.
2714// This can be used to form W instructions for add/sub/mul/shl even when the
2715// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
2716// SimplifyDemandedBits has made it so some users see a sext_inreg and some
2717// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
2718// the add/sub/mul/shl to become non-W instructions. By checking the users we
2719// may be able to use a W instruction and CSE with the other instruction if
2720// this has happened. We could try to detect that the CSE opportunity exists
2721// before doing this, but that would be more complicated.
2723 const unsigned Depth) const {
2724 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
2725 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
2726 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
2727 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
2728 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
2729 isa<ConstantSDNode>(Node) || Depth != 0) &&
2730 "Unexpected opcode");
2731
2733 return false;
2734
2735 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
2736 SDNode *User = *UI;
2737 // Users of this node should have already been instruction selected
2738 if (!User->isMachineOpcode())
2739 return false;
2740
2741 // TODO: Add more opcodes?
2742 switch (User->getMachineOpcode()) {
2743 default:
2744 return false;
2745 case RISCV::ADDW:
2746 case RISCV::ADDIW:
2747 case RISCV::SUBW:
2748 case RISCV::MULW:
2749 case RISCV::SLLW:
2750 case RISCV::SLLIW:
2751 case RISCV::SRAW:
2752 case RISCV::SRAIW:
2753 case RISCV::SRLW:
2754 case RISCV::SRLIW:
2755 case RISCV::DIVW:
2756 case RISCV::DIVUW:
2757 case RISCV::REMW:
2758 case RISCV::REMUW:
2759 case RISCV::ROLW:
2760 case RISCV::RORW:
2761 case RISCV::RORIW:
2762 case RISCV::CLZW:
2763 case RISCV::CTZW:
2764 case RISCV::CPOPW:
2765 case RISCV::SLLI_UW:
2766 case RISCV::FMV_W_X:
2767 case RISCV::FCVT_H_W:
2768 case RISCV::FCVT_H_WU:
2769 case RISCV::FCVT_S_W:
2770 case RISCV::FCVT_S_WU:
2771 case RISCV::FCVT_D_W:
2772 case RISCV::FCVT_D_WU:
2773 case RISCV::TH_REVW:
2774 case RISCV::TH_SRRIW:
2775 if (Bits < 32)
2776 return false;
2777 break;
2778 case RISCV::SLL:
2779 case RISCV::SRA:
2780 case RISCV::SRL:
2781 case RISCV::ROL:
2782 case RISCV::ROR:
2783 case RISCV::BSET:
2784 case RISCV::BCLR:
2785 case RISCV::BINV:
2786 // Shift amount operands only use log2(Xlen) bits.
2787 if (UI.getOperandNo() != 1 || Bits < Log2_32(Subtarget->getXLen()))
2788 return false;
2789 break;
2790 case RISCV::SLLI:
2791 // SLLI only uses the lower (XLen - ShAmt) bits.
2792 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
2793 return false;
2794 break;
2795 case RISCV::ANDI:
2796 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
2797 break;
2798 goto RecCheck;
2799 case RISCV::ORI: {
2800 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
2801 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
2802 break;
2803 [[fallthrough]];
2804 }
2805 case RISCV::AND:
2806 case RISCV::OR:
2807 case RISCV::XOR:
2808 case RISCV::XORI:
2809 case RISCV::ANDN:
2810 case RISCV::ORN:
2811 case RISCV::XNOR:
2812 case RISCV::SH1ADD:
2813 case RISCV::SH2ADD:
2814 case RISCV::SH3ADD:
2815 RecCheck:
2816 if (hasAllNBitUsers(User, Bits, Depth + 1))
2817 break;
2818 return false;
2819 case RISCV::SRLI: {
2820 unsigned ShAmt = User->getConstantOperandVal(1);
2821 // If we are shifting right by less than Bits, and users don't demand any
2822 // bits that were shifted into [Bits-1:0], then we can consider this as an
2823 // N-Bit user.
2824 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
2825 break;
2826 return false;
2827 }
2828 case RISCV::SEXT_B:
2829 case RISCV::PACKH:
2830 if (Bits < 8)
2831 return false;
2832 break;
2833 case RISCV::SEXT_H:
2834 case RISCV::FMV_H_X:
2835 case RISCV::ZEXT_H_RV32:
2836 case RISCV::ZEXT_H_RV64:
2837 case RISCV::PACKW:
2838 if (Bits < 16)
2839 return false;
2840 break;
2841 case RISCV::PACK:
2842 if (Bits < (Subtarget->getXLen() / 2))
2843 return false;
2844 break;
2845 case RISCV::ADD_UW:
2846 case RISCV::SH1ADD_UW:
2847 case RISCV::SH2ADD_UW:
2848 case RISCV::SH3ADD_UW:
2849 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
2850 // 32 bits.
2851 if (UI.getOperandNo() != 0 || Bits < 32)
2852 return false;
2853 break;
2854 case RISCV::SB:
2855 if (UI.getOperandNo() != 0 || Bits < 8)
2856 return false;
2857 break;
2858 case RISCV::SH:
2859 if (UI.getOperandNo() != 0 || Bits < 16)
2860 return false;
2861 break;
2862 case RISCV::SW:
2863 if (UI.getOperandNo() != 0 || Bits < 32)
2864 return false;
2865 break;
2866 }
2867 }
2868
2869 return true;
2870}
2871
2872// Select a constant that can be represented as (sign_extend(imm5) << imm2).
2874 SDValue &Shl2) {
2875 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
2876 int64_t Offset = C->getSExtValue();
2877 int64_t Shift;
2878 for (Shift = 0; Shift < 4; Shift++)
2879 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
2880 break;
2881
2882 // Constant cannot be encoded.
2883 if (Shift == 4)
2884 return false;
2885
2886 EVT Ty = N->getValueType(0);
2887 Simm5 = CurDAG->getTargetConstant(Offset >> Shift, SDLoc(N), Ty);
2888 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
2889 return true;
2890 }
2891
2892 return false;
2893}
2894
2895// Select VL as a 5 bit immediate or a value that will become a register. This
2896// allows us to choose betwen VSETIVLI or VSETVLI later.
2898 auto *C = dyn_cast<ConstantSDNode>(N);
2899 if (C && isUInt<5>(C->getZExtValue())) {
2900 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
2901 N->getValueType(0));
2902 } else if (C && C->isAllOnes()) {
2903 // Treat all ones as VLMax.
2905 N->getValueType(0));
2906 } else if (isa<RegisterSDNode>(N) &&
2907 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
2908 // All our VL operands use an operand that allows GPRNoX0 or an immediate
2909 // as the register class. Convert X0 to a special immediate to pass the
2910 // MachineVerifier. This is recognized specially by the vsetvli insertion
2911 // pass.
2913 N->getValueType(0));
2914 } else {
2915 VL = N;
2916 }
2917
2918 return true;
2919}
2920
2922 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef())
2923 return false;
2924 assert(N.getNumOperands() == 3 && "Unexpected number of operands");
2925 SplatVal = N.getOperand(1);
2926 return true;
2927}
2928
2929using ValidateFn = bool (*)(int64_t);
2930
2932 SelectionDAG &DAG,
2933 const RISCVSubtarget &Subtarget,
2934 ValidateFn ValidateImm) {
2935 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||
2936 !isa<ConstantSDNode>(N.getOperand(1)))
2937 return false;
2938 assert(N.getNumOperands() == 3 && "Unexpected number of operands");
2939
2940 int64_t SplatImm =
2941 cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();
2942
2943 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
2944 // type is wider than the resulting vector element type: an implicit
2945 // truncation first takes place. Therefore, perform a manual
2946 // truncation/sign-extension in order to ignore any truncated bits and catch
2947 // any zero-extended immediate.
2948 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
2949 // sign-extending to (XLenVT -1).
2950 MVT XLenVT = Subtarget.getXLenVT();
2951 assert(XLenVT == N.getOperand(1).getSimpleValueType() &&
2952 "Unexpected splat operand type");
2953 MVT EltVT = N.getSimpleValueType().getVectorElementType();
2954 if (EltVT.bitsLT(XLenVT))
2955 SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits());
2956
2957 if (!ValidateImm(SplatImm))
2958 return false;
2959
2960 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT);
2961 return true;
2962}
2963
2965 return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget,
2966 [](int64_t Imm) { return isInt<5>(Imm); });
2967}
2968
2971 N, SplatVal, *CurDAG, *Subtarget,
2972 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
2973}
2974
2976 SDValue &SplatVal) {
2978 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
2979 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
2980 });
2981}
2982
2984 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||
2985 !isa<ConstantSDNode>(N.getOperand(1)))
2986 return false;
2987
2988 int64_t SplatImm =
2989 cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();
2990
2991 if (!isUInt<5>(SplatImm))
2992 return false;
2993
2994 SplatVal =
2995 CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT());
2996
2997 return true;
2998}
2999
3001 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3002 if (!CFP)
3003 return false;
3004 const APFloat &APF = CFP->getValueAPF();
3005 // td can handle +0.0 already.
3006 if (APF.isPosZero())
3007 return false;
3008
3009 MVT VT = CFP->getSimpleValueType(0);
3010
3011 if (static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
3012 VT) >= 0)
3013 return false;
3014
3015 MVT XLenVT = Subtarget->getXLenVT();
3016 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3017 assert(APF.isNegZero() && "Unexpected constant.");
3018 return false;
3019 }
3020 SDLoc DL(N);
3021 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3022 *Subtarget);
3023 return true;
3024}
3025
3027 SDValue &Imm) {
3028 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3029 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3030
3031 if (!isInt<5>(ImmVal))
3032 return false;
3033
3034 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());
3035 return true;
3036 }
3037
3038 return false;
3039}
3040
3041// Try to remove sext.w if the input is a W instruction or can be made into
3042// a W instruction cheaply.
3043bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3044 // Look for the sext.w pattern, addiw rd, rs1, 0.
3045 if (N->getMachineOpcode() != RISCV::ADDIW ||
3046 !isNullConstant(N->getOperand(1)))
3047 return false;
3048
3049 SDValue N0 = N->getOperand(0);
3050 if (!N0.isMachineOpcode())
3051 return false;
3052
3053 switch (N0.getMachineOpcode()) {
3054 default:
3055 break;
3056 case RISCV::ADD:
3057 case RISCV::ADDI:
3058 case RISCV::SUB:
3059 case RISCV::MUL:
3060 case RISCV::SLLI: {
3061 // Convert sext.w+add/sub/mul to their W instructions. This will create
3062 // a new independent instruction. This improves latency.
3063 unsigned Opc;
3064 switch (N0.getMachineOpcode()) {
3065 default:
3066 llvm_unreachable("Unexpected opcode!");
3067 case RISCV::ADD: Opc = RISCV::ADDW; break;
3068 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3069 case RISCV::SUB: Opc = RISCV::SUBW; break;
3070 case RISCV::MUL: Opc = RISCV::MULW; break;
3071 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3072 }
3073
3074 SDValue N00 = N0.getOperand(0);
3075 SDValue N01 = N0.getOperand(1);
3076
3077 // Shift amount needs to be uimm5.
3078 if (N0.getMachineOpcode() == RISCV::SLLI &&
3079 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
3080 break;
3081
3082 SDNode *Result =
3083 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
3084 N00, N01);
3085 ReplaceUses(N, Result);
3086 return true;
3087 }
3088 case RISCV::ADDW:
3089 case RISCV::ADDIW:
3090 case RISCV::SUBW:
3091 case RISCV::MULW:
3092 case RISCV::SLLIW:
3093 case RISCV::PACKW:
3094 case RISCV::TH_MULAW:
3095 case RISCV::TH_MULAH:
3096 case RISCV::TH_MULSW:
3097 case RISCV::TH_MULSH:
3098 // Result is already sign extended just remove the sext.w.
3099 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
3100 ReplaceUses(N, N0.getNode());
3101 return true;
3102 }
3103
3104 return false;
3105}
3106
3107// Return true if we can make sure mask of N is all-ones mask.
3108static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
3109 // Check that we're using V0 as a mask register.
3110 if (!isa<RegisterSDNode>(N->getOperand(MaskOpIdx)) ||
3111 cast<RegisterSDNode>(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0)
3112 return false;
3113
3114 // The glued user defines V0.
3115 const auto *Glued = N->getGluedNode();
3116
3117 if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
3118 return false;
3119
3120 // Check that we're defining V0 as a mask register.
3121 if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
3122 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
3123 return false;
3124
3125 // Check the instruction defining V0; it needs to be a VMSET pseudo.
3126 SDValue MaskSetter = Glued->getOperand(2);
3127
3128 const auto IsVMSet = [](unsigned Opc) {
3129 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
3130 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
3131 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
3132 Opc == RISCV::PseudoVMSET_M_B8;
3133 };
3134
3135 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3136 // undefined behaviour if it's the wrong bitwidth, so we could choose to
3137 // assume that it's all-ones? Same applies to its VL.
3138 return MaskSetter->isMachineOpcode() &&
3139 IsVMSet(MaskSetter.getMachineOpcode());
3140}
3141
3142// Optimize masked RVV pseudo instructions with a known all-ones mask to their
3143// corresponding "unmasked" pseudo versions. The mask we're interested in will
3144// take the form of a V0 physical register operand, with a glued
3145// register-setting instruction.
3146bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {
3148 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
3149 if (!I)
3150 return false;
3151
3152 unsigned MaskOpIdx = I->MaskOpIdx;
3153
3154 if (!usesAllOnesMask(N, MaskOpIdx))
3155 return false;
3156
3157 // Retrieve the tail policy operand index, if any.
3158 std::optional<unsigned> TailPolicyOpIdx;
3159 const RISCVInstrInfo &TII = *Subtarget->getInstrInfo();
3160 const MCInstrDesc &MaskedMCID = TII.get(N->getMachineOpcode());
3161
3162 bool UseTUPseudo = false;
3163 if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) {
3164 // Some operations are their own TU.
3165 if (I->UnmaskedTUPseudo == I->UnmaskedPseudo) {
3166 UseTUPseudo = true;
3167 } else {
3168 TailPolicyOpIdx = getVecPolicyOpIdx(N, MaskedMCID);
3169 if (!(N->getConstantOperandVal(*TailPolicyOpIdx) &
3171 // We can't use TA if the tie-operand is not IMPLICIT_DEF
3172 if (!N->getOperand(0).isUndef()) {
3173 // Keep the true-masked instruction when there is no unmasked TU
3174 // instruction
3175 if (I->UnmaskedTUPseudo == I->MaskedPseudo)
3176 return false;
3177 UseTUPseudo = true;
3178 }
3179 }
3180 }
3181 }
3182
3183 unsigned Opc = UseTUPseudo ? I->UnmaskedTUPseudo : I->UnmaskedPseudo;
3184
3185 // Check that we're dropping the mask operand and any policy operand
3186 // when we transform to this unmasked pseudo. Additionally, if this
3187 // instruction is tail agnostic, the unmasked instruction should not have a
3188 // merge op.
3190 assert((UseTUPseudo == RISCVII::hasMergeOp(TSFlags)) &&
3192 "Unexpected pseudo to transform to");
3193 (void)TSFlags;
3194
3196 // Skip the merge operand at index 0 if !UseTUPseudo.
3197 for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) {
3198 // Skip the mask, the policy, and the Glue.
3199 SDValue Op = N->getOperand(I);
3200 if (I == MaskOpIdx || I == TailPolicyOpIdx ||
3201 Op.getValueType() == MVT::Glue)
3202 continue;
3203 Ops.push_back(Op);
3204 }
3205
3206 // Transitively apply any node glued to our new node.
3207 const auto *Glued = N->getGluedNode();
3208 if (auto *TGlued = Glued->getGluedNode())
3209 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
3210
3211 SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3212 Result->setFlags(N->getFlags());
3213 ReplaceUses(N, Result);
3214
3215 return true;
3216}
3217
3218// Try to fold away VMERGE_VVM instructions. We handle these cases:
3219// -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction
3220// folds to a masked TU instruction. VMERGE_VVM must have have merge operand
3221// same as false operand.
3222// -Masked TA VMERGE_VVM combined with an unmasked TA instruction fold to a
3223// masked TA instruction.
3224// -Unmasked TU VMERGE_VVM combined with a masked MU TA instruction folds to
3225// masked TU instruction. Both instructions must have the same merge operand.
3226// VMERGE_VVM must have have merge operand same as false operand.
3227bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N, bool IsTA) {
3228 unsigned Offset = IsTA ? 0 : 1;
3229 uint64_t Policy = IsTA ? RISCVII::TAIL_AGNOSTIC : /*TUMU*/ 0;
3230
3231 SDValue False = N->getOperand(0 + Offset);
3232 SDValue True = N->getOperand(1 + Offset);
3233 SDValue Mask = N->getOperand(2 + Offset);
3234 SDValue VL = N->getOperand(3 + Offset);
3235
3236 assert(True.getResNo() == 0 &&
3237 "Expect True is the first output of an instruction.");
3238
3239 // Need N is the exactly one using True.
3240 if (!True.hasOneUse())
3241 return false;
3242
3243 if (!True.isMachineOpcode())
3244 return false;
3245
3246 unsigned TrueOpc = True.getMachineOpcode();
3247
3248 // Skip if True has merge operand.
3249 uint64_t TrueTSFlags = TII->get(TrueOpc).TSFlags;
3250 bool HasMergeOp = RISCVII::hasMergeOp(TrueTSFlags);
3251
3252 bool IsMasked = false;
3254 RISCV::lookupMaskedIntrinsicByUnmaskedTA(TrueOpc);
3255 if (!Info && HasMergeOp) {
3256 Info = RISCV::getMaskedPseudoInfo(TrueOpc);
3257 IsMasked = true;
3258 }
3259
3260 if (!Info)
3261 return false;
3262
3263 if (HasMergeOp) {
3264 // The vmerge instruction must be TU.
3265 // FIXME: This could be relaxed, but we need to handle the policy for the
3266 // resulting op correctly.
3267 if (IsTA)
3268 return false;
3269 SDValue MergeOpTrue = True->getOperand(0);
3270 // Both the vmerge instruction and the True instruction must have the same
3271 // merge operand.
3272 if (False != MergeOpTrue)
3273 return false;
3274 }
3275
3276 if (IsMasked) {
3277 assert(HasMergeOp && "Expected merge op");
3278 // The vmerge instruction must be TU.
3279 if (IsTA)
3280 return false;
3281 // The vmerge instruction must have an all 1s mask since we're going to keep
3282 // the mask from the True instruction.
3283 // FIXME: Support mask agnostic True instruction which would have an
3284 // undef merge operand.
3285 if (!usesAllOnesMask(N, /* MaskOpIdx */ 3))
3286 return false;
3287 }
3288
3289 // Skip if True has side effect.
3290 // TODO: Support vleff and vlsegff.
3291 if (TII->get(TrueOpc).hasUnmodeledSideEffects())
3292 return false;
3293
3294 // The last operand of a masked instruction may be glued.
3295 bool HasGlueOp = True->getGluedNode() != nullptr;
3296
3297 // The chain operand may exist either before the glued operands or in the last
3298 // position.
3299 unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
3300 bool HasChainOp =
3301 True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
3302
3303 if (HasChainOp) {
3304 // Avoid creating cycles in the DAG. We must ensure that none of the other
3305 // operands depend on True through it's Chain.
3306 SmallVector<const SDNode *, 4> LoopWorklist;
3308 LoopWorklist.push_back(False.getNode());
3309 LoopWorklist.push_back(Mask.getNode());
3310 LoopWorklist.push_back(VL.getNode());
3311 if (SDNode *Glued = N->getGluedNode())
3312 LoopWorklist.push_back(Glued);
3313 if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
3314 return false;
3315 }
3316
3317 // The vector policy operand may be present for masked intrinsics
3318 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
3319 unsigned TrueVLIndex =
3320 True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3321 SDValue TrueVL = True.getOperand(TrueVLIndex);
3322
3323 auto IsNoFPExcept = [this](SDValue N) {
3324 return !this->mayRaiseFPException(N.getNode()) ||
3325 N->getFlags().hasNoFPExcept();
3326 };
3327
3328 // Allow the peephole for non-exception True with VLMAX vector length, since
3329 // all the values after VL of N are dependent on Merge. VLMAX should be
3330 // lowered to (XLenVT -1).
3331 if (TrueVL != VL && !(IsNoFPExcept(True) && isAllOnesConstant(TrueVL)))
3332 return false;
3333
3334 SDLoc DL(N);
3335 unsigned MaskedOpc = Info->MaskedPseudo;
3337 "Expected instructions with mask have policy operand.");
3338 assert(RISCVII::hasMergeOp(TII->get(MaskedOpc).TSFlags) &&
3339 "Expected instructions with mask have merge operand.");
3340
3342 if (IsMasked) {
3343 Ops.append(True->op_begin(), True->op_begin() + TrueVLIndex);
3344 Ops.append({VL, /* SEW */ True.getOperand(TrueVLIndex + 1)});
3345 Ops.push_back(
3346 CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT()));
3347 Ops.append(True->op_begin() + TrueVLIndex + 3, True->op_end());
3348 } else {
3349 if (!HasMergeOp)
3350 Ops.push_back(False);
3351 Ops.append(True->op_begin(), True->op_begin() + TrueVLIndex);
3352 Ops.append({Mask, VL, /* SEW */ True.getOperand(TrueVLIndex + 1)});
3353 Ops.push_back(
3354 CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT()));
3355
3356 // Result node should have chain operand of True.
3357 if (HasChainOp)
3358 Ops.push_back(True.getOperand(TrueChainOpIdx));
3359
3360 if (N->getGluedNode())
3361 Ops.push_back(N->getOperand(N->getNumOperands() - 1));
3362 }
3363
3364 SDNode *Result =
3365 CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
3366 Result->setFlags(True->getFlags());
3367
3368 // Replace vmerge.vvm node by Result.
3369 ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
3370
3371 // Replace another value of True. E.g. chain and VL.
3372 for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
3373 ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
3374
3375 // Try to transform Result to unmasked intrinsic.
3376 doPeepholeMaskedRVV(Result);
3377 return true;
3378}
3379
3380// Transform (VMERGE_VVM_<LMUL>_TU false, false, true, allones, vl, sew) to
3381// (VADD_VI_<LMUL>_TU false, true, 0, vl, sew). It may decrease uses of VMSET.
3382bool RISCVDAGToDAGISel::performVMergeToVAdd(SDNode *N) {
3383 unsigned NewOpc;
3384 switch (N->getMachineOpcode()) {
3385 default:
3386 llvm_unreachable("Expected VMERGE_VVM_<LMUL>_TU instruction.");
3387 case RISCV::PseudoVMERGE_VVM_MF8_TU:
3388 NewOpc = RISCV::PseudoVADD_VI_MF8_TU;
3389 break;
3390 case RISCV::PseudoVMERGE_VVM_MF4_TU:
3391 NewOpc = RISCV::PseudoVADD_VI_MF4_TU;
3392 break;
3393 case RISCV::PseudoVMERGE_VVM_MF2_TU:
3394 NewOpc = RISCV::PseudoVADD_VI_MF2_TU;
3395 break;
3396 case RISCV::PseudoVMERGE_VVM_M1_TU:
3397 NewOpc = RISCV::PseudoVADD_VI_M1_TU;
3398 break;
3399 case RISCV::PseudoVMERGE_VVM_M2_TU:
3400 NewOpc = RISCV::PseudoVADD_VI_M2_TU;
3401 break;
3402 case RISCV::PseudoVMERGE_VVM_M4_TU:
3403 NewOpc = RISCV::PseudoVADD_VI_M4_TU;
3404 break;
3405 case RISCV::PseudoVMERGE_VVM_M8_TU:
3406 NewOpc = RISCV::PseudoVADD_VI_M8_TU;
3407 break;
3408 }
3409
3410 if (!usesAllOnesMask(N, /* MaskOpIdx */ 3))
3411 return false;
3412
3413 SDLoc DL(N);
3414 EVT VT = N->getValueType(0);
3415 SDValue Ops[] = {N->getOperand(1), N->getOperand(2),
3416 CurDAG->getTargetConstant(0, DL, Subtarget->getXLenVT()),
3417 N->getOperand(4), N->getOperand(5)};
3418 SDNode *Result = CurDAG->getMachineNode(NewOpc, DL, VT, Ops);
3419 ReplaceUses(N, Result);
3420 return true;
3421}
3422
3423bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
3424 bool MadeChange = false;
3426
3427 while (Position != CurDAG->allnodes_begin()) {
3428 SDNode *N = &*--Position;
3429 if (N->use_empty() || !N->isMachineOpcode())
3430 continue;
3431
3432 auto IsVMergeTU = [](unsigned Opcode) {
3433 return Opcode == RISCV::PseudoVMERGE_VVM_MF8_TU ||
3434 Opcode == RISCV::PseudoVMERGE_VVM_MF4_TU ||
3435 Opcode == RISCV::PseudoVMERGE_VVM_MF2_TU ||
3436 Opcode == RISCV::PseudoVMERGE_VVM_M1_TU ||
3437 Opcode == RISCV::PseudoVMERGE_VVM_M2_TU ||
3438 Opcode == RISCV::PseudoVMERGE_VVM_M4_TU ||
3439 Opcode == RISCV::PseudoVMERGE_VVM_M8_TU;
3440 };
3441
3442 auto IsVMergeTA = [](unsigned Opcode) {
3443 return Opcode == RISCV::PseudoVMERGE_VVM_MF8 ||
3444 Opcode == RISCV::PseudoVMERGE_VVM_MF4 ||
3445 Opcode == RISCV::PseudoVMERGE_VVM_MF2 ||
3446 Opcode == RISCV::PseudoVMERGE_VVM_M1 ||
3447 Opcode == RISCV::PseudoVMERGE_VVM_M2 ||
3448 Opcode == RISCV::PseudoVMERGE_VVM_M4 ||
3449 Opcode == RISCV::PseudoVMERGE_VVM_M8;
3450 };
3451
3452 unsigned Opc = N->getMachineOpcode();
3453 // The following optimizations require that the merge operand of N is same
3454 // as the false operand of N.
3455 if ((IsVMergeTU(Opc) && N->getOperand(0) == N->getOperand(1)) ||
3456 IsVMergeTA(Opc))
3457 MadeChange |= performCombineVMergeAndVOps(N, IsVMergeTA(Opc));
3458 if (IsVMergeTU(Opc) && N->getOperand(0) == N->getOperand(1))
3459 MadeChange |= performVMergeToVAdd(N);
3460 }
3461 return MadeChange;
3462}
3463
3464// This pass converts a legalized DAG into a RISCV-specific DAG, ready
3465// for instruction scheduling.
3467 CodeGenOpt::Level OptLevel) {
3468 return new RISCVDAGToDAGISel(TM, OptLevel);
3469}
3470
3471char RISCVDAGToDAGISel::ID = 0;
3472
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define DEBUG_TYPE
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
typename CallsiteContextGraph< DerivedCCG, FuncTy, CallTy >::FuncInfo FuncInfo
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
R600 Clause Merge
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx)
#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b)
static bool isAllUndef(ArrayRef< SDValue > Values)
static bool isWorthFoldingAdd(SDValue Add)
static unsigned getLastNonGlueOrChainOpIdx(const SDNode *Node)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static unsigned getVecPolicyOpIdx(const SDNode *Node, const MCInstrDesc &MCID)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, ValidateFn ValidateImm)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset)
bool(*)(int64_t) ValidateFn
#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b)
unsigned Log2SEW
RISCVII::VLMUL VLMul
unsigned SEW
uint64_t TSFlags
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define PASS_NAME
Value * RHS
Value * LHS
bool isZero() const
Definition: APFloat.h:1265
APInt bitcastToAPInt() const
Definition: APFloat.h:1184
bool isPosZero() const
Definition: APFloat.h:1280
bool isNegZero() const
Definition: APFloat.h:1281
Class for arbitrary precision integers.
Definition: APInt.h:75
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1443
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1239
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:269
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1520
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
This class is used to form a handle around another node that is persistent and is updated across invo...
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
Definition: MCInstrDesc.h:463
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool selectVSplatUimm5(SDValue N, SDValue &SplatVal)
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
void selectVSSEG(SDNode *Node, bool IsMasked, bool IsStrided)
bool SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void selectVLSEGFF(SDNode *Node, bool IsMasked)
bool selectFPImm(SDValue N, SDValue &Imm)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
bool hasAllHUsers(SDNode *Node) const
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
void selectVLXSEG(SDNode *Node, bool IsMasked, bool IsOrdered)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
void selectVSXSEG(SDNode *Node, bool IsMasked, bool IsOrdered)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset, bool IsINX=false)
void selectVLSEG(SDNode *Node, bool IsMasked, bool IsStrided)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
unsigned getXLen() const
bool hasVInstructions() const
bool hasStdExtZhinxOrZhinxmin() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVII::VLMUL getLMUL(MVT VT)
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
op_iterator op_end() const
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
const TargetLowering * TLI
MachineFunction * MF
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
bool mayRaiseFPException(SDNode *Node) const
Return whether the node may raise an FP exception.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOpt::Level OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:721
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:551
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:448
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:531
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:532
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:726
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:675
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:560
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
static constexpr TypeSize Fixed(ScalarTy ExactSize)
Definition: TypeSize.h:331
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
Iterator for intrusive lists based on ilist_node.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:119
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Level
Code generation optimization level.
Definition: CodeGen.h:57
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:749
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:558
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1178
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:978
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:898
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1174
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:626
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:704
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:572
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:794
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:679
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1396
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1447
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1492
static bool hasDummyMaskOp(uint64_t TSFlags)
static bool hasMergeOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures)
static unsigned decodeVSEW(unsigned VSEW)
unsigned encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic)
static constexpr int64_t VLMaxSentinel
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:440
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1819
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:271
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:281
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:179
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:286
unsigned M1(unsigned Val)
Definition: VE.h:468
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:382
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:245
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:274
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOpt::Level OptLevel)
@ Add
Sum of integers.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:557
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:373
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:149
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.