LLVM 19.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
20#include "llvm/IR/IntrinsicsRISCV.h"
22#include "llvm/Support/Debug.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE "riscv-isel"
29#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
30
32 "riscv-use-rematerializable-movimm", cl::Hidden,
33 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
34 "constant materialization"),
35 cl::init(false));
36
37namespace llvm::RISCV {
38#define GET_RISCVVSSEGTable_IMPL
39#define GET_RISCVVLSEGTable_IMPL
40#define GET_RISCVVLXSEGTable_IMPL
41#define GET_RISCVVSXSEGTable_IMPL
42#define GET_RISCVVLETable_IMPL
43#define GET_RISCVVSETable_IMPL
44#define GET_RISCVVLXTable_IMPL
45#define GET_RISCVVSXTable_IMPL
46#define GET_RISCVMaskedPseudosTable_IMPL
47#include "RISCVGenSearchableTables.inc"
48} // namespace llvm::RISCV
49
52
53 bool MadeChange = false;
54 while (Position != CurDAG->allnodes_begin()) {
55 SDNode *N = &*--Position;
56 if (N->use_empty())
57 continue;
58
59 SDValue Result;
60 switch (N->getOpcode()) {
61 case ISD::SPLAT_VECTOR: {
62 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
63 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
64 MVT VT = N->getSimpleValueType(0);
65 unsigned Opc =
67 SDLoc DL(N);
68 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
69 SDValue Src = N->getOperand(0);
70 if (VT.isInteger())
71 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
72 N->getOperand(0));
73 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
74 break;
75 }
77 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
78 // load. Done after lowering and combining so that we have a chance to
79 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
80 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
81 MVT VT = N->getSimpleValueType(0);
82 SDValue Passthru = N->getOperand(0);
83 SDValue Lo = N->getOperand(1);
84 SDValue Hi = N->getOperand(2);
85 SDValue VL = N->getOperand(3);
86 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
87 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
88 "Unexpected VTs!");
90 SDLoc DL(N);
91
92 // Create temporary stack for each expanding node.
93 SDValue StackSlot =
95 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
97
98 SDValue Chain = CurDAG->getEntryNode();
99 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
100
101 SDValue OffsetSlot =
103 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
104 Align(8));
105
106 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
107
108 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
109 SDValue IntID =
110 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
111 SDValue Ops[] = {Chain,
112 IntID,
113 Passthru,
114 StackSlot,
115 CurDAG->getRegister(RISCV::X0, MVT::i64),
116 VL};
117
119 MVT::i64, MPI, Align(8),
121 break;
122 }
123 }
124
125 if (Result) {
126 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
127 LLVM_DEBUG(N->dump(CurDAG));
128 LLVM_DEBUG(dbgs() << "\nNew: ");
129 LLVM_DEBUG(Result->dump(CurDAG));
130 LLVM_DEBUG(dbgs() << "\n");
131
133 MadeChange = true;
134 }
135 }
136
137 if (MadeChange)
139}
140
142 HandleSDNode Dummy(CurDAG->getRoot());
144
145 bool MadeChange = false;
146 while (Position != CurDAG->allnodes_begin()) {
147 SDNode *N = &*--Position;
148 // Skip dead nodes and any non-machine opcodes.
149 if (N->use_empty() || !N->isMachineOpcode())
150 continue;
151
152 MadeChange |= doPeepholeSExtW(N);
153
154 // FIXME: This is here only because the VMerge transform doesn't
155 // know how to handle masked true inputs. Once that has been moved
156 // to post-ISEL, this can be deleted as well.
157 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
158 }
159
160 CurDAG->setRoot(Dummy.getValue());
161
162 MadeChange |= doPeepholeMergeVVMFold();
163
164 // After we're done with everything else, convert IMPLICIT_DEF
165 // passthru operands to NoRegister. This is required to workaround
166 // an optimization deficiency in MachineCSE. This really should
167 // be merged back into each of the patterns (i.e. there's no good
168 // reason not to go directly to NoReg), but is being done this way
169 // to allow easy backporting.
170 MadeChange |= doPeepholeNoRegPassThru();
171
172 if (MadeChange)
174}
175
176static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
178 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
179 for (const RISCVMatInt::Inst &Inst : Seq) {
180 SDValue SDImm = CurDAG->getTargetConstant(Inst.getImm(), DL, VT);
181 SDNode *Result = nullptr;
182 switch (Inst.getOpndKind()) {
183 case RISCVMatInt::Imm:
184 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
185 break;
187 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
188 CurDAG->getRegister(RISCV::X0, VT));
189 break;
191 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
192 break;
194 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
195 break;
196 }
197
198 // Only the first instruction has X0 as its source.
199 SrcReg = SDValue(Result, 0);
200 }
201
202 return SrcReg;
203}
204
205static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
206 int64_t Imm, const RISCVSubtarget &Subtarget) {
208
209 // Use a rematerializable pseudo instruction for short sequences if enabled.
210 if (Seq.size() == 2 && UsePseudoMovImm)
211 return SDValue(
212 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
213 CurDAG->getTargetConstant(Imm, DL, VT)),
214 0);
215
216 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
217 // worst an LUI+ADDIW. This will require an extra register, but avoids a
218 // constant pool.
219 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
220 // low and high 32 bits are the same and bit 31 and 63 are set.
221 if (Seq.size() > 3) {
222 unsigned ShiftAmt, AddOpc;
224 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
225 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
226 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
227
228 SDValue SLLI = SDValue(
229 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
230 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
231 0);
232 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
233 }
234 }
235
236 // Otherwise, use the original sequence.
237 return selectImmSeq(CurDAG, DL, VT, Seq);
238}
239
241 unsigned NF, RISCVII::VLMUL LMUL) {
242 static const unsigned M1TupleRegClassIDs[] = {
243 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
244 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
245 RISCV::VRN8M1RegClassID};
246 static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID,
247 RISCV::VRN3M2RegClassID,
248 RISCV::VRN4M2RegClassID};
249
250 assert(Regs.size() >= 2 && Regs.size() <= 8);
251
252 unsigned RegClassID;
253 unsigned SubReg0;
254 switch (LMUL) {
255 default:
256 llvm_unreachable("Invalid LMUL.");
261 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
262 "Unexpected subreg numbering");
263 SubReg0 = RISCV::sub_vrm1_0;
264 RegClassID = M1TupleRegClassIDs[NF - 2];
265 break;
267 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
268 "Unexpected subreg numbering");
269 SubReg0 = RISCV::sub_vrm2_0;
270 RegClassID = M2TupleRegClassIDs[NF - 2];
271 break;
273 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
274 "Unexpected subreg numbering");
275 SubReg0 = RISCV::sub_vrm4_0;
276 RegClassID = RISCV::VRN2M4RegClassID;
277 break;
278 }
279
280 SDLoc DL(Regs[0]);
282
283 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
284
285 for (unsigned I = 0; I < Regs.size(); ++I) {
286 Ops.push_back(Regs[I]);
287 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
288 }
289 SDNode *N =
290 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
291 return SDValue(N, 0);
292}
293
295 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
296 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
297 bool IsLoad, MVT *IndexVT) {
298 SDValue Chain = Node->getOperand(0);
299 SDValue Glue;
300
301 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
302
303 if (IsStridedOrIndexed) {
304 Operands.push_back(Node->getOperand(CurOp++)); // Index.
305 if (IndexVT)
306 *IndexVT = Operands.back()->getSimpleValueType(0);
307 }
308
309 if (IsMasked) {
310 // Mask needs to be copied to V0.
311 SDValue Mask = Node->getOperand(CurOp++);
312 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
313 Glue = Chain.getValue(1);
314 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
315 }
316 SDValue VL;
317 selectVLOp(Node->getOperand(CurOp++), VL);
318 Operands.push_back(VL);
319
320 MVT XLenVT = Subtarget->getXLenVT();
321 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
322 Operands.push_back(SEWOp);
323
324 // At the IR layer, all the masked load intrinsics have policy operands,
325 // none of the others do. All have passthru operands. For our pseudos,
326 // all loads have policy operands.
327 if (IsLoad) {
329 if (IsMasked)
330 Policy = Node->getConstantOperandVal(CurOp++);
331 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
332 Operands.push_back(PolicyOp);
333 }
334
335 Operands.push_back(Chain); // Chain.
336 if (Glue)
337 Operands.push_back(Glue);
338}
339
340void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
341 bool IsStrided) {
342 SDLoc DL(Node);
343 unsigned NF = Node->getNumValues() - 1;
344 MVT VT = Node->getSimpleValueType(0);
345 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
347
348 unsigned CurOp = 2;
350
351 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
352 Node->op_begin() + CurOp + NF);
353 SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
354 Operands.push_back(Merge);
355 CurOp += NF;
356
357 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
358 Operands, /*IsLoad=*/true);
359
360 const RISCV::VLSEGPseudo *P =
361 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
362 static_cast<unsigned>(LMUL));
363 MachineSDNode *Load =
364 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
365
366 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
367 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
368
369 SDValue SuperReg = SDValue(Load, 0);
370 for (unsigned I = 0; I < NF; ++I) {
371 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
372 ReplaceUses(SDValue(Node, I),
373 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
374 }
375
376 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
377 CurDAG->RemoveDeadNode(Node);
378}
379
380void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
381 SDLoc DL(Node);
382 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain.
383 MVT VT = Node->getSimpleValueType(0);
384 MVT XLenVT = Subtarget->getXLenVT();
385 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
387
388 unsigned CurOp = 2;
390
391 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
392 Node->op_begin() + CurOp + NF);
393 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
394 Operands.push_back(MaskedOff);
395 CurOp += NF;
396
397 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
398 /*IsStridedOrIndexed*/ false, Operands,
399 /*IsLoad=*/true);
400
401 const RISCV::VLSEGPseudo *P =
402 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
403 Log2SEW, static_cast<unsigned>(LMUL));
404 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
405 XLenVT, MVT::Other, Operands);
406
407 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
408 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
409
410 SDValue SuperReg = SDValue(Load, 0);
411 for (unsigned I = 0; I < NF; ++I) {
412 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
413 ReplaceUses(SDValue(Node, I),
414 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
415 }
416
417 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL
418 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain
419 CurDAG->RemoveDeadNode(Node);
420}
421
422void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
423 bool IsOrdered) {
424 SDLoc DL(Node);
425 unsigned NF = Node->getNumValues() - 1;
426 MVT VT = Node->getSimpleValueType(0);
427 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
429
430 unsigned CurOp = 2;
432
433 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
434 Node->op_begin() + CurOp + NF);
435 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
436 Operands.push_back(MaskedOff);
437 CurOp += NF;
438
439 MVT IndexVT;
440 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
441 /*IsStridedOrIndexed*/ true, Operands,
442 /*IsLoad=*/true, &IndexVT);
443
445 "Element count mismatch");
446
447 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
448 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
449 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
450 report_fatal_error("The V extension does not support EEW=64 for index "
451 "values when XLEN=32");
452 }
453 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
454 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
455 static_cast<unsigned>(IndexLMUL));
456 MachineSDNode *Load =
457 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
458
459 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
460 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
461
462 SDValue SuperReg = SDValue(Load, 0);
463 for (unsigned I = 0; I < NF; ++I) {
464 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
465 ReplaceUses(SDValue(Node, I),
466 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
467 }
468
469 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
470 CurDAG->RemoveDeadNode(Node);
471}
472
473void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked,
474 bool IsStrided) {
475 SDLoc DL(Node);
476 unsigned NF = Node->getNumOperands() - 4;
477 if (IsStrided)
478 NF--;
479 if (IsMasked)
480 NF--;
481 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
482 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
484 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
485 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
486
488 Operands.push_back(StoreVal);
489 unsigned CurOp = 2 + NF;
490
491 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
492 Operands);
493
494 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
495 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
496 MachineSDNode *Store =
497 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
498
499 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
500 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
501
502 ReplaceNode(Node, Store);
503}
504
505void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
506 bool IsOrdered) {
507 SDLoc DL(Node);
508 unsigned NF = Node->getNumOperands() - 5;
509 if (IsMasked)
510 --NF;
511 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
512 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
514 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
515 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
516
518 Operands.push_back(StoreVal);
519 unsigned CurOp = 2 + NF;
520
521 MVT IndexVT;
522 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
523 /*IsStridedOrIndexed*/ true, Operands,
524 /*IsLoad=*/false, &IndexVT);
525
527 "Element count mismatch");
528
529 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
530 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
531 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
532 report_fatal_error("The V extension does not support EEW=64 for index "
533 "values when XLEN=32");
534 }
535 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
536 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
537 static_cast<unsigned>(IndexLMUL));
538 MachineSDNode *Store =
539 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
540
541 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
542 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
543
544 ReplaceNode(Node, Store);
545}
546
548 if (!Subtarget->hasVInstructions())
549 return;
550
551 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
552
553 SDLoc DL(Node);
554 MVT XLenVT = Subtarget->getXLenVT();
555
556 unsigned IntNo = Node->getConstantOperandVal(0);
557
558 assert((IntNo == Intrinsic::riscv_vsetvli ||
559 IntNo == Intrinsic::riscv_vsetvlimax) &&
560 "Unexpected vsetvli intrinsic");
561
562 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
563 unsigned Offset = (VLMax ? 1 : 2);
564
565 assert(Node->getNumOperands() == Offset + 2 &&
566 "Unexpected number of operands");
567
568 unsigned SEW =
569 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
570 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
571 Node->getConstantOperandVal(Offset + 1) & 0x7);
572
573 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
574 /*MaskAgnostic*/ true);
575 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
576
577 SDValue VLOperand;
578 unsigned Opcode = RISCV::PseudoVSETVLI;
579 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
580 if (auto VLEN = Subtarget->getRealVLen())
581 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
582 VLMax = true;
583 }
584 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
585 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
586 Opcode = RISCV::PseudoVSETVLIX0;
587 } else {
588 VLOperand = Node->getOperand(1);
589
590 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
591 uint64_t AVL = C->getZExtValue();
592 if (isUInt<5>(AVL)) {
593 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
594 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
595 XLenVT, VLImm, VTypeIOp));
596 return;
597 }
598 }
599 }
600
601 ReplaceNode(Node,
602 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
603}
604
606 MVT VT = Node->getSimpleValueType(0);
607 unsigned Opcode = Node->getOpcode();
608 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
609 "Unexpected opcode");
610 SDLoc DL(Node);
611
612 // For operations of the form (x << C1) op C2, check if we can use
613 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
614 SDValue N0 = Node->getOperand(0);
615 SDValue N1 = Node->getOperand(1);
616
617 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
618 if (!Cst)
619 return false;
620
621 int64_t Val = Cst->getSExtValue();
622
623 // Check if immediate can already use ANDI/ORI/XORI.
624 if (isInt<12>(Val))
625 return false;
626
627 SDValue Shift = N0;
628
629 // If Val is simm32 and we have a sext_inreg from i32, then the binop
630 // produces at least 33 sign bits. We can peek through the sext_inreg and use
631 // a SLLIW at the end.
632 bool SignExt = false;
633 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
634 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
635 SignExt = true;
636 Shift = N0.getOperand(0);
637 }
638
639 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
640 return false;
641
642 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
643 if (!ShlCst)
644 return false;
645
646 uint64_t ShAmt = ShlCst->getZExtValue();
647
648 // Make sure that we don't change the operation by removing bits.
649 // This only matters for OR and XOR, AND is unaffected.
650 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
651 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
652 return false;
653
654 int64_t ShiftedVal = Val >> ShAmt;
655 if (!isInt<12>(ShiftedVal))
656 return false;
657
658 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
659 if (SignExt && ShAmt >= 32)
660 return false;
661
662 // Ok, we can reorder to get a smaller immediate.
663 unsigned BinOpc;
664 switch (Opcode) {
665 default: llvm_unreachable("Unexpected opcode");
666 case ISD::AND: BinOpc = RISCV::ANDI; break;
667 case ISD::OR: BinOpc = RISCV::ORI; break;
668 case ISD::XOR: BinOpc = RISCV::XORI; break;
669 }
670
671 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
672
673 SDNode *BinOp =
674 CurDAG->getMachineNode(BinOpc, DL, VT, Shift.getOperand(0),
675 CurDAG->getTargetConstant(ShiftedVal, DL, VT));
676 SDNode *SLLI =
677 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
678 CurDAG->getTargetConstant(ShAmt, DL, VT));
679 ReplaceNode(Node, SLLI);
680 return true;
681}
682
684 // Only supported with XTHeadBb at the moment.
685 if (!Subtarget->hasVendorXTHeadBb())
686 return false;
687
688 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
689 if (!N1C)
690 return false;
691
692 SDValue N0 = Node->getOperand(0);
693 if (!N0.hasOneUse())
694 return false;
695
696 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
697 MVT VT) {
698 return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
699 CurDAG->getTargetConstant(Msb, DL, VT),
700 CurDAG->getTargetConstant(Lsb, DL, VT));
701 };
702
703 SDLoc DL(Node);
704 MVT VT = Node->getSimpleValueType(0);
705 const unsigned RightShAmt = N1C->getZExtValue();
706
707 // Transform (sra (shl X, C1) C2) with C1 < C2
708 // -> (TH.EXT X, msb, lsb)
709 if (N0.getOpcode() == ISD::SHL) {
710 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
711 if (!N01C)
712 return false;
713
714 const unsigned LeftShAmt = N01C->getZExtValue();
715 // Make sure that this is a bitfield extraction (i.e., the shift-right
716 // amount can not be less than the left-shift).
717 if (LeftShAmt > RightShAmt)
718 return false;
719
720 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
721 const unsigned Msb = MsbPlusOne - 1;
722 const unsigned Lsb = RightShAmt - LeftShAmt;
723
724 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
725 ReplaceNode(Node, TH_EXT);
726 return true;
727 }
728
729 // Transform (sra (sext_inreg X, _), C) ->
730 // (TH.EXT X, msb, lsb)
731 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
732 unsigned ExtSize =
733 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
734
735 // ExtSize of 32 should use sraiw via tablegen pattern.
736 if (ExtSize == 32)
737 return false;
738
739 const unsigned Msb = ExtSize - 1;
740 const unsigned Lsb = RightShAmt;
741
742 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
743 ReplaceNode(Node, TH_EXT);
744 return true;
745 }
746
747 return false;
748}
749
751 // Target does not support indexed loads.
752 if (!Subtarget->hasVendorXTHeadMemIdx())
753 return false;
754
755 LoadSDNode *Ld = cast<LoadSDNode>(Node);
757 if (AM == ISD::UNINDEXED)
758 return false;
759
760 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
761 if (!C)
762 return false;
763
764 EVT LoadVT = Ld->getMemoryVT();
765 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
766 "Unexpected addressing mode");
767 bool IsPre = AM == ISD::PRE_INC;
768 bool IsPost = AM == ISD::POST_INC;
769 int64_t Offset = C->getSExtValue();
770
771 // The constants that can be encoded in the THeadMemIdx instructions
772 // are of the form (sign_extend(imm5) << imm2).
773 int64_t Shift;
774 for (Shift = 0; Shift < 4; Shift++)
775 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
776 break;
777
778 // Constant cannot be encoded.
779 if (Shift == 4)
780 return false;
781
782 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
783 unsigned Opcode;
784 if (LoadVT == MVT::i8 && IsPre)
785 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
786 else if (LoadVT == MVT::i8 && IsPost)
787 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
788 else if (LoadVT == MVT::i16 && IsPre)
789 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
790 else if (LoadVT == MVT::i16 && IsPost)
791 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
792 else if (LoadVT == MVT::i32 && IsPre)
793 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
794 else if (LoadVT == MVT::i32 && IsPost)
795 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
796 else if (LoadVT == MVT::i64 && IsPre)
797 Opcode = RISCV::TH_LDIB;
798 else if (LoadVT == MVT::i64 && IsPost)
799 Opcode = RISCV::TH_LDIA;
800 else
801 return false;
802
803 EVT Ty = Ld->getOffset().getValueType();
804 SDValue Ops[] = {Ld->getBasePtr(),
805 CurDAG->getTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
806 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty),
807 Ld->getChain()};
808 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
809 Ld->getValueType(1), MVT::Other, Ops);
810
811 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
812 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
813
814 ReplaceNode(Node, New);
815
816 return true;
817}
818
820 if (!Subtarget->hasVInstructions())
821 return;
822
823 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
824
825 SDLoc DL(Node);
826 unsigned IntNo = Node->getConstantOperandVal(1);
827
828 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
829 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
830 "Unexpected vsetvli intrinsic");
831
832 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
833 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
834 SDValue SEWOp =
835 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
836 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
837 Node->getOperand(4), Node->getOperand(5),
838 Node->getOperand(8), SEWOp,
839 Node->getOperand(0)};
840
841 unsigned Opcode;
842 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
843 switch (LMulSDNode->getSExtValue()) {
844 case 5:
845 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF8
846 : RISCV::PseudoVC_I_SE_MF8;
847 break;
848 case 6:
849 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF4
850 : RISCV::PseudoVC_I_SE_MF4;
851 break;
852 case 7:
853 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF2
854 : RISCV::PseudoVC_I_SE_MF2;
855 break;
856 case 0:
857 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M1
858 : RISCV::PseudoVC_I_SE_M1;
859 break;
860 case 1:
861 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M2
862 : RISCV::PseudoVC_I_SE_M2;
863 break;
864 case 2:
865 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M4
866 : RISCV::PseudoVC_I_SE_M4;
867 break;
868 case 3:
869 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M8
870 : RISCV::PseudoVC_I_SE_M8;
871 break;
872 }
873
875 Opcode, DL, Node->getSimpleValueType(0), Operands));
876}
877
879 // If we have a custom node, we have already selected.
880 if (Node->isMachineOpcode()) {
881 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
882 Node->setNodeId(-1);
883 return;
884 }
885
886 // Instruction Selection not handled by the auto-generated tablegen selection
887 // should be handled here.
888 unsigned Opcode = Node->getOpcode();
889 MVT XLenVT = Subtarget->getXLenVT();
890 SDLoc DL(Node);
891 MVT VT = Node->getSimpleValueType(0);
892
893 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
894
895 switch (Opcode) {
896 case ISD::Constant: {
897 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
898 auto *ConstNode = cast<ConstantSDNode>(Node);
899 if (ConstNode->isZero()) {
900 SDValue New =
901 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
902 ReplaceNode(Node, New.getNode());
903 return;
904 }
905 int64_t Imm = ConstNode->getSExtValue();
906 // If the upper XLen-16 bits are not used, try to convert this to a simm12
907 // by sign extending bit 15.
908 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
909 hasAllHUsers(Node))
910 Imm = SignExtend64<16>(Imm);
911 // If the upper 32-bits are not used try to convert this into a simm32 by
912 // sign extending bit 32.
913 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
914 Imm = SignExtend64<32>(Imm);
915
916 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
917 return;
918 }
919 case ISD::ConstantFP: {
920 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
921 auto [FPImm, NeedsFNeg] =
922 static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
923 VT);
924 if (FPImm >= 0) {
925 unsigned Opc;
926 unsigned FNegOpc;
927 switch (VT.SimpleTy) {
928 default:
929 llvm_unreachable("Unexpected size");
930 case MVT::f16:
931 Opc = RISCV::FLI_H;
932 FNegOpc = RISCV::FSGNJN_H;
933 break;
934 case MVT::f32:
935 Opc = RISCV::FLI_S;
936 FNegOpc = RISCV::FSGNJN_S;
937 break;
938 case MVT::f64:
939 Opc = RISCV::FLI_D;
940 FNegOpc = RISCV::FSGNJN_D;
941 break;
942 }
944 Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));
945 if (NeedsFNeg)
946 Res = CurDAG->getMachineNode(FNegOpc, DL, VT, SDValue(Res, 0),
947 SDValue(Res, 0));
948
949 ReplaceNode(Node, Res);
950 return;
951 }
952
953 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
954 SDValue Imm;
955 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
956 // create an integer immediate.
957 if (APF.isPosZero() || NegZeroF64)
958 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
959 else
960 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
961 *Subtarget);
962
963 bool HasZdinx = Subtarget->hasStdExtZdinx();
964 bool Is64Bit = Subtarget->is64Bit();
965 unsigned Opc;
966 switch (VT.SimpleTy) {
967 default:
968 llvm_unreachable("Unexpected size");
969 case MVT::bf16:
970 assert(Subtarget->hasStdExtZfbfmin());
971 Opc = RISCV::FMV_H_X;
972 break;
973 case MVT::f16:
974 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
975 break;
976 case MVT::f32:
977 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
978 break;
979 case MVT::f64:
980 // For RV32, we can't move from a GPR, we need to convert instead. This
981 // should only happen for +0.0 and -0.0.
982 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
983 if (Is64Bit)
984 Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
985 else
986 Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
987 break;
988 }
989
990 SDNode *Res;
991 if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
992 Res = CurDAG->getMachineNode(
993 Opc, DL, VT, Imm,
995 else
996 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
997
998 // For f64 -0.0, we need to insert a fneg.d idiom.
999 if (NegZeroF64) {
1000 Opc = RISCV::FSGNJN_D;
1001 if (HasZdinx)
1002 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1003 Res =
1004 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1005 }
1006
1007 ReplaceNode(Node, Res);
1008 return;
1009 }
1011 if (!Subtarget->hasStdExtZdinx())
1012 break;
1013
1014 assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1015
1016 SDValue Ops[] = {
1017 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1018 Node->getOperand(0),
1019 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1020 Node->getOperand(1),
1021 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1022
1023 SDNode *N =
1024 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::f64, Ops);
1025 ReplaceNode(Node, N);
1026 return;
1027 }
1028 case RISCVISD::SplitF64: {
1029 if (Subtarget->hasStdExtZdinx()) {
1030 assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1031
1032 if (!SDValue(Node, 0).use_empty()) {
1033 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, VT,
1034 Node->getOperand(0));
1035 ReplaceUses(SDValue(Node, 0), Lo);
1036 }
1037
1038 if (!SDValue(Node, 1).use_empty()) {
1039 SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, VT,
1040 Node->getOperand(0));
1041 ReplaceUses(SDValue(Node, 1), Hi);
1042 }
1043
1044 CurDAG->RemoveDeadNode(Node);
1045 return;
1046 }
1047
1048 if (!Subtarget->hasStdExtZfa())
1049 break;
1050 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1051 "Unexpected subtarget");
1052
1053 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1054 if (!SDValue(Node, 0).use_empty()) {
1055 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1056 Node->getOperand(0));
1057 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1058 }
1059 if (!SDValue(Node, 1).use_empty()) {
1060 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1061 Node->getOperand(0));
1062 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1063 }
1064
1065 CurDAG->RemoveDeadNode(Node);
1066 return;
1067 }
1068 case ISD::SHL: {
1069 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1070 if (!N1C)
1071 break;
1072 SDValue N0 = Node->getOperand(0);
1073 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1074 !isa<ConstantSDNode>(N0.getOperand(1)))
1075 break;
1076 unsigned ShAmt = N1C->getZExtValue();
1077 uint64_t Mask = N0.getConstantOperandVal(1);
1078
1079 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
1080 // 32 leading zeros and C3 trailing zeros.
1081 if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
1082 unsigned XLen = Subtarget->getXLen();
1083 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1084 unsigned TrailingZeros = llvm::countr_zero(Mask);
1085 if (TrailingZeros > 0 && LeadingZeros == 32) {
1086 SDNode *SRLIW = CurDAG->getMachineNode(
1087 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1088 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1089 SDNode *SLLI = CurDAG->getMachineNode(
1090 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1091 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1092 ReplaceNode(Node, SLLI);
1093 return;
1094 }
1095 }
1096 break;
1097 }
1098 case ISD::SRL: {
1099 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1100 if (!N1C)
1101 break;
1102 SDValue N0 = Node->getOperand(0);
1103 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1104 break;
1105 unsigned ShAmt = N1C->getZExtValue();
1106 uint64_t Mask = N0.getConstantOperandVal(1);
1107
1108 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1109 // 32 leading zeros and C3 trailing zeros.
1110 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1111 unsigned XLen = Subtarget->getXLen();
1112 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1113 unsigned TrailingZeros = llvm::countr_zero(Mask);
1114 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1115 SDNode *SRLIW = CurDAG->getMachineNode(
1116 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1117 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1118 SDNode *SLLI = CurDAG->getMachineNode(
1119 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1120 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1121 ReplaceNode(Node, SLLI);
1122 return;
1123 }
1124 }
1125
1126 // Optimize (srl (and X, C2), C) ->
1127 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1128 // Where C2 is a mask with C3 trailing ones.
1129 // Taking into account that the C2 may have had lower bits unset by
1130 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1131 // This pattern occurs when type legalizing right shifts for types with
1132 // less than XLen bits.
1133 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1134 if (!isMask_64(Mask))
1135 break;
1136 unsigned TrailingOnes = llvm::countr_one(Mask);
1137 if (ShAmt >= TrailingOnes)
1138 break;
1139 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1140 if (TrailingOnes == 32) {
1141 SDNode *SRLI = CurDAG->getMachineNode(
1142 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1143 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1144 ReplaceNode(Node, SRLI);
1145 return;
1146 }
1147
1148 // Only do the remaining transforms if the AND has one use.
1149 if (!N0.hasOneUse())
1150 break;
1151
1152 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1153 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1154 SDNode *BEXTI = CurDAG->getMachineNode(
1155 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1156 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1157 ReplaceNode(Node, BEXTI);
1158 return;
1159 }
1160
1161 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1162 SDNode *SLLI =
1163 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1164 CurDAG->getTargetConstant(LShAmt, DL, VT));
1165 SDNode *SRLI = CurDAG->getMachineNode(
1166 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1167 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1168 ReplaceNode(Node, SRLI);
1169 return;
1170 }
1171 case ISD::SRA: {
1172 if (trySignedBitfieldExtract(Node))
1173 return;
1174
1175 // Optimize (sra (sext_inreg X, i16), C) ->
1176 // (srai (slli X, (XLen-16), (XLen-16) + C)
1177 // And (sra (sext_inreg X, i8), C) ->
1178 // (srai (slli X, (XLen-8), (XLen-8) + C)
1179 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1180 // This transform matches the code we get without Zbb. The shifts are more
1181 // compressible, and this can help expose CSE opportunities in the sdiv by
1182 // constant optimization.
1183 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1184 if (!N1C)
1185 break;
1186 SDValue N0 = Node->getOperand(0);
1187 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1188 break;
1189 unsigned ShAmt = N1C->getZExtValue();
1190 unsigned ExtSize =
1191 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1192 // ExtSize of 32 should use sraiw via tablegen pattern.
1193 if (ExtSize >= 32 || ShAmt >= ExtSize)
1194 break;
1195 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1196 SDNode *SLLI =
1197 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1198 CurDAG->getTargetConstant(LShAmt, DL, VT));
1199 SDNode *SRAI = CurDAG->getMachineNode(
1200 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1201 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1202 ReplaceNode(Node, SRAI);
1203 return;
1204 }
1205 case ISD::OR:
1206 case ISD::XOR:
1207 if (tryShrinkShlLogicImm(Node))
1208 return;
1209
1210 break;
1211 case ISD::AND: {
1212 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1213 if (!N1C)
1214 break;
1215 uint64_t C1 = N1C->getZExtValue();
1216 const bool isC1Mask = isMask_64(C1);
1217 const bool isC1ANDI = isInt<12>(C1);
1218
1219 SDValue N0 = Node->getOperand(0);
1220
1221 auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
1222 SDValue X, unsigned Msb,
1223 unsigned Lsb) {
1224 if (!Subtarget->hasVendorXTHeadBb())
1225 return false;
1226
1227 SDNode *TH_EXTU = CurDAG->getMachineNode(
1228 RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
1229 CurDAG->getTargetConstant(Lsb, DL, VT));
1230 ReplaceNode(Node, TH_EXTU);
1231 return true;
1232 };
1233
1234 bool LeftShift = N0.getOpcode() == ISD::SHL;
1235 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1236 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1237 if (!C)
1238 break;
1239 unsigned C2 = C->getZExtValue();
1240 unsigned XLen = Subtarget->getXLen();
1241 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1242
1243 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1244 // shift pair might offer more compression opportunities.
1245 // TODO: We could check for C extension here, but we don't have many lit
1246 // tests with the C extension enabled so not checking gets better
1247 // coverage.
1248 // TODO: What if ANDI faster than shift?
1249 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1250
1251 // Clear irrelevant bits in the mask.
1252 if (LeftShift)
1253 C1 &= maskTrailingZeros<uint64_t>(C2);
1254 else
1255 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1256
1257 // Some transforms should only be done if the shift has a single use or
1258 // the AND would become (srli (slli X, 32), 32)
1259 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1260
1261 SDValue X = N0.getOperand(0);
1262
1263 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1264 // with c3 leading zeros.
1265 if (!LeftShift && isC1Mask) {
1266 unsigned Leading = XLen - llvm::bit_width(C1);
1267 if (C2 < Leading) {
1268 // If the number of leading zeros is C2+32 this can be SRLIW.
1269 if (C2 + 32 == Leading) {
1270 SDNode *SRLIW = CurDAG->getMachineNode(
1271 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1272 ReplaceNode(Node, SRLIW);
1273 return;
1274 }
1275
1276 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1277 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1278 //
1279 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1280 // legalized and goes through DAG combine.
1281 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1282 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1283 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1284 SDNode *SRAIW =
1285 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1286 CurDAG->getTargetConstant(31, DL, VT));
1287 SDNode *SRLIW = CurDAG->getMachineNode(
1288 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1289 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1290 ReplaceNode(Node, SRLIW);
1291 return;
1292 }
1293
1294 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1295 // available.
1296 // Transform (and (srl x, C2), C1)
1297 // -> (<bfextract> x, msb, lsb)
1298 //
1299 // Make sure to keep this below the SRLIW cases, as we always want to
1300 // prefer the more common instruction.
1301 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1302 const unsigned Lsb = C2;
1303 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1304 return;
1305
1306 // (srli (slli x, c3-c2), c3).
1307 // Skip if we could use (zext.w (sraiw X, C2)).
1308 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1309 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1310 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1311 // Also Skip if we can use bexti or th.tst.
1312 Skip |= HasBitTest && Leading == XLen - 1;
1313 if (OneUseOrZExtW && !Skip) {
1314 SDNode *SLLI = CurDAG->getMachineNode(
1315 RISCV::SLLI, DL, VT, X,
1316 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1317 SDNode *SRLI = CurDAG->getMachineNode(
1318 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1319 CurDAG->getTargetConstant(Leading, DL, VT));
1320 ReplaceNode(Node, SRLI);
1321 return;
1322 }
1323 }
1324 }
1325
1326 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1327 // shifted by c2 bits with c3 leading zeros.
1328 if (LeftShift && isShiftedMask_64(C1)) {
1329 unsigned Leading = XLen - llvm::bit_width(C1);
1330
1331 if (C2 + Leading < XLen &&
1332 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1333 // Use slli.uw when possible.
1334 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1335 SDNode *SLLI_UW =
1336 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1337 CurDAG->getTargetConstant(C2, DL, VT));
1338 ReplaceNode(Node, SLLI_UW);
1339 return;
1340 }
1341
1342 // (srli (slli c2+c3), c3)
1343 if (OneUseOrZExtW && !IsCANDI) {
1344 SDNode *SLLI = CurDAG->getMachineNode(
1345 RISCV::SLLI, DL, VT, X,
1346 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1347 SDNode *SRLI = CurDAG->getMachineNode(
1348 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1349 CurDAG->getTargetConstant(Leading, DL, VT));
1350 ReplaceNode(Node, SRLI);
1351 return;
1352 }
1353 }
1354 }
1355
1356 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1357 // shifted mask with c2 leading zeros and c3 trailing zeros.
1358 if (!LeftShift && isShiftedMask_64(C1)) {
1359 unsigned Leading = XLen - llvm::bit_width(C1);
1360 unsigned Trailing = llvm::countr_zero(C1);
1361 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1362 !IsCANDI) {
1363 unsigned SrliOpc = RISCV::SRLI;
1364 // If the input is zexti32 we should use SRLIW.
1365 if (X.getOpcode() == ISD::AND &&
1366 isa<ConstantSDNode>(X.getOperand(1)) &&
1367 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1368 SrliOpc = RISCV::SRLIW;
1369 X = X.getOperand(0);
1370 }
1371 SDNode *SRLI = CurDAG->getMachineNode(
1372 SrliOpc, DL, VT, X,
1373 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1374 SDNode *SLLI = CurDAG->getMachineNode(
1375 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1376 CurDAG->getTargetConstant(Trailing, DL, VT));
1377 ReplaceNode(Node, SLLI);
1378 return;
1379 }
1380 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1381 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1382 OneUseOrZExtW && !IsCANDI) {
1383 SDNode *SRLIW = CurDAG->getMachineNode(
1384 RISCV::SRLIW, DL, VT, X,
1385 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1386 SDNode *SLLI = CurDAG->getMachineNode(
1387 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1388 CurDAG->getTargetConstant(Trailing, DL, VT));
1389 ReplaceNode(Node, SLLI);
1390 return;
1391 }
1392 }
1393
1394 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1395 // shifted mask with no leading zeros and c3 trailing zeros.
1396 if (LeftShift && isShiftedMask_64(C1)) {
1397 unsigned Leading = XLen - llvm::bit_width(C1);
1398 unsigned Trailing = llvm::countr_zero(C1);
1399 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1400 SDNode *SRLI = CurDAG->getMachineNode(
1401 RISCV::SRLI, DL, VT, X,
1402 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1403 SDNode *SLLI = CurDAG->getMachineNode(
1404 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1405 CurDAG->getTargetConstant(Trailing, DL, VT));
1406 ReplaceNode(Node, SLLI);
1407 return;
1408 }
1409 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1410 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1411 SDNode *SRLIW = CurDAG->getMachineNode(
1412 RISCV::SRLIW, DL, VT, X,
1413 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1414 SDNode *SLLI = CurDAG->getMachineNode(
1415 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1416 CurDAG->getTargetConstant(Trailing, DL, VT));
1417 ReplaceNode(Node, SLLI);
1418 return;
1419 }
1420 }
1421 }
1422
1423 // If C1 masks off the upper bits only (but can't be formed as an
1424 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1425 // available.
1426 // Transform (and x, C1)
1427 // -> (<bfextract> x, msb, lsb)
1428 if (isC1Mask && !isC1ANDI) {
1429 const unsigned Msb = llvm::bit_width(C1) - 1;
1430 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1431 return;
1432 }
1433
1434 if (tryShrinkShlLogicImm(Node))
1435 return;
1436
1437 break;
1438 }
1439 case ISD::MUL: {
1440 // Special case for calculating (mul (and X, C2), C1) where the full product
1441 // fits in XLen bits. We can shift X left by the number of leading zeros in
1442 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1443 // product has XLen trailing zeros, putting it in the output of MULHU. This
1444 // can avoid materializing a constant in a register for C2.
1445
1446 // RHS should be a constant.
1447 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1448 if (!N1C || !N1C->hasOneUse())
1449 break;
1450
1451 // LHS should be an AND with constant.
1452 SDValue N0 = Node->getOperand(0);
1453 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1454 break;
1455
1457
1458 // Constant should be a mask.
1459 if (!isMask_64(C2))
1460 break;
1461
1462 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1463 // multiple users or the constant is a simm12. This prevents inserting a
1464 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1465 // make it more costly to materialize. Otherwise, using a SLLI might allow
1466 // it to be compressed.
1467 bool IsANDIOrZExt =
1468 isInt<12>(C2) ||
1469 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1470 // With XTHeadBb, we can use TH.EXTU.
1471 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1472 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1473 break;
1474 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1475 // the constant is a simm32.
1476 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1477 // With XTHeadBb, we can use TH.EXTU.
1478 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1479 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1480 break;
1481
1482 // We need to shift left the AND input and C1 by a total of XLen bits.
1483
1484 // How far left do we need to shift the AND input?
1485 unsigned XLen = Subtarget->getXLen();
1486 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1487
1488 // The constant gets shifted by the remaining amount unless that would
1489 // shift bits out.
1490 uint64_t C1 = N1C->getZExtValue();
1491 unsigned ConstantShift = XLen - LeadingZeros;
1492 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1493 break;
1494
1495 uint64_t ShiftedC1 = C1 << ConstantShift;
1496 // If this RV32, we need to sign extend the constant.
1497 if (XLen == 32)
1498 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1499
1500 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1501 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1502 SDNode *SLLI =
1503 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1504 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1505 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1506 SDValue(SLLI, 0), SDValue(Imm, 0));
1507 ReplaceNode(Node, MULHU);
1508 return;
1509 }
1510 case ISD::LOAD: {
1511 if (tryIndexedLoad(Node))
1512 return;
1513 break;
1514 }
1516 unsigned IntNo = Node->getConstantOperandVal(0);
1517 switch (IntNo) {
1518 // By default we do not custom select any intrinsic.
1519 default:
1520 break;
1521 case Intrinsic::riscv_vmsgeu:
1522 case Intrinsic::riscv_vmsge: {
1523 SDValue Src1 = Node->getOperand(1);
1524 SDValue Src2 = Node->getOperand(2);
1525 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1526 bool IsCmpUnsignedZero = false;
1527 // Only custom select scalar second operand.
1528 if (Src2.getValueType() != XLenVT)
1529 break;
1530 // Small constants are handled with patterns.
1531 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1532 int64_t CVal = C->getSExtValue();
1533 if (CVal >= -15 && CVal <= 16) {
1534 if (!IsUnsigned || CVal != 0)
1535 break;
1536 IsCmpUnsignedZero = true;
1537 }
1538 }
1539 MVT Src1VT = Src1.getSimpleValueType();
1540 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;
1541 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1542 default:
1543 llvm_unreachable("Unexpected LMUL!");
1544#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \
1545 case RISCVII::VLMUL::lmulenum: \
1546 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1547 : RISCV::PseudoVMSLT_VX_##suffix; \
1548 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1549 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \
1550 break;
1551 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)
1552 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)
1553 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)
1555 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)
1556 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)
1557 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)
1558#undef CASE_VMSLT_VMNAND_VMSET_OPCODES
1559 }
1561 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1562 SDValue VL;
1563 selectVLOp(Node->getOperand(3), VL);
1564
1565 // If vmsgeu with 0 immediate, expand it to vmset.
1566 if (IsCmpUnsignedZero) {
1567 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));
1568 return;
1569 }
1570
1571 // Expand to
1572 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1573 SDValue Cmp = SDValue(
1574 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1575 0);
1576 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1577 {Cmp, Cmp, VL, SEW}));
1578 return;
1579 }
1580 case Intrinsic::riscv_vmsgeu_mask:
1581 case Intrinsic::riscv_vmsge_mask: {
1582 SDValue Src1 = Node->getOperand(2);
1583 SDValue Src2 = Node->getOperand(3);
1584 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1585 bool IsCmpUnsignedZero = false;
1586 // Only custom select scalar second operand.
1587 if (Src2.getValueType() != XLenVT)
1588 break;
1589 // Small constants are handled with patterns.
1590 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1591 int64_t CVal = C->getSExtValue();
1592 if (CVal >= -15 && CVal <= 16) {
1593 if (!IsUnsigned || CVal != 0)
1594 break;
1595 IsCmpUnsignedZero = true;
1596 }
1597 }
1598 MVT Src1VT = Src1.getSimpleValueType();
1599 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1600 VMOROpcode;
1601 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1602 default:
1603 llvm_unreachable("Unexpected LMUL!");
1604#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \
1605 case RISCVII::VLMUL::lmulenum: \
1606 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1607 : RISCV::PseudoVMSLT_VX_##suffix; \
1608 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
1609 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
1610 break;
1611 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)
1612 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)
1613 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)
1614 CASE_VMSLT_OPCODES(LMUL_1, M1, B8)
1615 CASE_VMSLT_OPCODES(LMUL_2, M2, B16)
1616 CASE_VMSLT_OPCODES(LMUL_4, M4, B32)
1617 CASE_VMSLT_OPCODES(LMUL_8, M8, B64)
1618#undef CASE_VMSLT_OPCODES
1619 }
1620 // Mask operations use the LMUL from the mask type.
1621 switch (RISCVTargetLowering::getLMUL(VT)) {
1622 default:
1623 llvm_unreachable("Unexpected LMUL!");
1624#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
1625 case RISCVII::VLMUL::lmulenum: \
1626 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
1627 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
1628 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
1629 break;
1630 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)
1631 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)
1632 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)
1637#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1638 }
1640 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1641 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1642 SDValue VL;
1643 selectVLOp(Node->getOperand(5), VL);
1644 SDValue MaskedOff = Node->getOperand(1);
1645 SDValue Mask = Node->getOperand(4);
1646
1647 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.
1648 if (IsCmpUnsignedZero) {
1649 // We don't need vmor if the MaskedOff and the Mask are the same
1650 // value.
1651 if (Mask == MaskedOff) {
1652 ReplaceUses(Node, Mask.getNode());
1653 return;
1654 }
1655 ReplaceNode(Node,
1656 CurDAG->getMachineNode(VMOROpcode, DL, VT,
1657 {Mask, MaskedOff, VL, MaskSEW}));
1658 return;
1659 }
1660
1661 // If the MaskedOff value and the Mask are the same value use
1662 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
1663 // This avoids needing to copy v0 to vd before starting the next sequence.
1664 if (Mask == MaskedOff) {
1665 SDValue Cmp = SDValue(
1666 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1667 0);
1668 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1669 {Mask, Cmp, VL, MaskSEW}));
1670 return;
1671 }
1672
1673 // Mask needs to be copied to V0.
1675 RISCV::V0, Mask, SDValue());
1676 SDValue Glue = Chain.getValue(1);
1677 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1678
1679 // Otherwise use
1680 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1681 // The result is mask undisturbed.
1682 // We use the same instructions to emulate mask agnostic behavior, because
1683 // the agnostic result can be either undisturbed or all 1.
1684 SDValue Cmp = SDValue(
1685 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1686 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1687 0);
1688 // vmxor.mm vd, vd, v0 is used to update active value.
1689 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1690 {Cmp, Mask, VL, MaskSEW}));
1691 return;
1692 }
1693 case Intrinsic::riscv_vsetvli:
1694 case Intrinsic::riscv_vsetvlimax:
1695 return selectVSETVLI(Node);
1696 }
1697 break;
1698 }
1700 unsigned IntNo = Node->getConstantOperandVal(1);
1701 switch (IntNo) {
1702 // By default we do not custom select any intrinsic.
1703 default:
1704 break;
1705 case Intrinsic::riscv_vlseg2:
1706 case Intrinsic::riscv_vlseg3:
1707 case Intrinsic::riscv_vlseg4:
1708 case Intrinsic::riscv_vlseg5:
1709 case Intrinsic::riscv_vlseg6:
1710 case Intrinsic::riscv_vlseg7:
1711 case Intrinsic::riscv_vlseg8: {
1712 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1713 return;
1714 }
1715 case Intrinsic::riscv_vlseg2_mask:
1716 case Intrinsic::riscv_vlseg3_mask:
1717 case Intrinsic::riscv_vlseg4_mask:
1718 case Intrinsic::riscv_vlseg5_mask:
1719 case Intrinsic::riscv_vlseg6_mask:
1720 case Intrinsic::riscv_vlseg7_mask:
1721 case Intrinsic::riscv_vlseg8_mask: {
1722 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1723 return;
1724 }
1725 case Intrinsic::riscv_vlsseg2:
1726 case Intrinsic::riscv_vlsseg3:
1727 case Intrinsic::riscv_vlsseg4:
1728 case Intrinsic::riscv_vlsseg5:
1729 case Intrinsic::riscv_vlsseg6:
1730 case Intrinsic::riscv_vlsseg7:
1731 case Intrinsic::riscv_vlsseg8: {
1732 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1733 return;
1734 }
1735 case Intrinsic::riscv_vlsseg2_mask:
1736 case Intrinsic::riscv_vlsseg3_mask:
1737 case Intrinsic::riscv_vlsseg4_mask:
1738 case Intrinsic::riscv_vlsseg5_mask:
1739 case Intrinsic::riscv_vlsseg6_mask:
1740 case Intrinsic::riscv_vlsseg7_mask:
1741 case Intrinsic::riscv_vlsseg8_mask: {
1742 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1743 return;
1744 }
1745 case Intrinsic::riscv_vloxseg2:
1746 case Intrinsic::riscv_vloxseg3:
1747 case Intrinsic::riscv_vloxseg4:
1748 case Intrinsic::riscv_vloxseg5:
1749 case Intrinsic::riscv_vloxseg6:
1750 case Intrinsic::riscv_vloxseg7:
1751 case Intrinsic::riscv_vloxseg8:
1752 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1753 return;
1754 case Intrinsic::riscv_vluxseg2:
1755 case Intrinsic::riscv_vluxseg3:
1756 case Intrinsic::riscv_vluxseg4:
1757 case Intrinsic::riscv_vluxseg5:
1758 case Intrinsic::riscv_vluxseg6:
1759 case Intrinsic::riscv_vluxseg7:
1760 case Intrinsic::riscv_vluxseg8:
1761 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1762 return;
1763 case Intrinsic::riscv_vloxseg2_mask:
1764 case Intrinsic::riscv_vloxseg3_mask:
1765 case Intrinsic::riscv_vloxseg4_mask:
1766 case Intrinsic::riscv_vloxseg5_mask:
1767 case Intrinsic::riscv_vloxseg6_mask:
1768 case Intrinsic::riscv_vloxseg7_mask:
1769 case Intrinsic::riscv_vloxseg8_mask:
1770 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1771 return;
1772 case Intrinsic::riscv_vluxseg2_mask:
1773 case Intrinsic::riscv_vluxseg3_mask:
1774 case Intrinsic::riscv_vluxseg4_mask:
1775 case Intrinsic::riscv_vluxseg5_mask:
1776 case Intrinsic::riscv_vluxseg6_mask:
1777 case Intrinsic::riscv_vluxseg7_mask:
1778 case Intrinsic::riscv_vluxseg8_mask:
1779 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1780 return;
1781 case Intrinsic::riscv_vlseg8ff:
1782 case Intrinsic::riscv_vlseg7ff:
1783 case Intrinsic::riscv_vlseg6ff:
1784 case Intrinsic::riscv_vlseg5ff:
1785 case Intrinsic::riscv_vlseg4ff:
1786 case Intrinsic::riscv_vlseg3ff:
1787 case Intrinsic::riscv_vlseg2ff: {
1788 selectVLSEGFF(Node, /*IsMasked*/ false);
1789 return;
1790 }
1791 case Intrinsic::riscv_vlseg8ff_mask:
1792 case Intrinsic::riscv_vlseg7ff_mask:
1793 case Intrinsic::riscv_vlseg6ff_mask:
1794 case Intrinsic::riscv_vlseg5ff_mask:
1795 case Intrinsic::riscv_vlseg4ff_mask:
1796 case Intrinsic::riscv_vlseg3ff_mask:
1797 case Intrinsic::riscv_vlseg2ff_mask: {
1798 selectVLSEGFF(Node, /*IsMasked*/ true);
1799 return;
1800 }
1801 case Intrinsic::riscv_vloxei:
1802 case Intrinsic::riscv_vloxei_mask:
1803 case Intrinsic::riscv_vluxei:
1804 case Intrinsic::riscv_vluxei_mask: {
1805 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1806 IntNo == Intrinsic::riscv_vluxei_mask;
1807 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1808 IntNo == Intrinsic::riscv_vloxei_mask;
1809
1810 MVT VT = Node->getSimpleValueType(0);
1811 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1812
1813 unsigned CurOp = 2;
1815 Operands.push_back(Node->getOperand(CurOp++));
1816
1817 MVT IndexVT;
1818 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1819 /*IsStridedOrIndexed*/ true, Operands,
1820 /*IsLoad=*/true, &IndexVT);
1821
1823 "Element count mismatch");
1824
1826 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1827 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1828 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1829 report_fatal_error("The V extension does not support EEW=64 for index "
1830 "values when XLEN=32");
1831 }
1832 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
1833 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
1834 static_cast<unsigned>(IndexLMUL));
1835 MachineSDNode *Load =
1836 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1837
1838 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1839 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1840
1841 ReplaceNode(Node, Load);
1842 return;
1843 }
1844 case Intrinsic::riscv_vlm:
1845 case Intrinsic::riscv_vle:
1846 case Intrinsic::riscv_vle_mask:
1847 case Intrinsic::riscv_vlse:
1848 case Intrinsic::riscv_vlse_mask: {
1849 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
1850 IntNo == Intrinsic::riscv_vlse_mask;
1851 bool IsStrided =
1852 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
1853
1854 MVT VT = Node->getSimpleValueType(0);
1855 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1856
1857 // The riscv_vlm intrinsic are always tail agnostic and no passthru
1858 // operand at the IR level. In pseudos, they have both policy and
1859 // passthru operand. The passthru operand is needed to track the
1860 // "tail undefined" state, and the policy is there just for
1861 // for consistency - it will always be "don't care" for the
1862 // unmasked form.
1863 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
1864 unsigned CurOp = 2;
1866 if (HasPassthruOperand)
1867 Operands.push_back(Node->getOperand(CurOp++));
1868 else {
1869 // We eagerly lower to implicit_def (instead of undef), as we
1870 // otherwise fail to select nodes such as: nxv1i1 = undef
1871 SDNode *Passthru =
1872 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
1873 Operands.push_back(SDValue(Passthru, 0));
1874 }
1875 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1876 Operands, /*IsLoad=*/true);
1877
1879 const RISCV::VLEPseudo *P =
1880 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
1881 static_cast<unsigned>(LMUL));
1882 MachineSDNode *Load =
1883 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1884
1885 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1886 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1887
1888 ReplaceNode(Node, Load);
1889 return;
1890 }
1891 case Intrinsic::riscv_vleff:
1892 case Intrinsic::riscv_vleff_mask: {
1893 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
1894
1895 MVT VT = Node->getSimpleValueType(0);
1896 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1897
1898 unsigned CurOp = 2;
1900 Operands.push_back(Node->getOperand(CurOp++));
1901 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1902 /*IsStridedOrIndexed*/ false, Operands,
1903 /*IsLoad=*/true);
1904
1906 const RISCV::VLEPseudo *P =
1907 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
1908 Log2SEW, static_cast<unsigned>(LMUL));
1910 P->Pseudo, DL, Node->getVTList(), Operands);
1911 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1912 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1913
1914 ReplaceNode(Node, Load);
1915 return;
1916 }
1917 }
1918 break;
1919 }
1920 case ISD::INTRINSIC_VOID: {
1921 unsigned IntNo = Node->getConstantOperandVal(1);
1922 switch (IntNo) {
1923 case Intrinsic::riscv_vsseg2:
1924 case Intrinsic::riscv_vsseg3:
1925 case Intrinsic::riscv_vsseg4:
1926 case Intrinsic::riscv_vsseg5:
1927 case Intrinsic::riscv_vsseg6:
1928 case Intrinsic::riscv_vsseg7:
1929 case Intrinsic::riscv_vsseg8: {
1930 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1931 return;
1932 }
1933 case Intrinsic::riscv_vsseg2_mask:
1934 case Intrinsic::riscv_vsseg3_mask:
1935 case Intrinsic::riscv_vsseg4_mask:
1936 case Intrinsic::riscv_vsseg5_mask:
1937 case Intrinsic::riscv_vsseg6_mask:
1938 case Intrinsic::riscv_vsseg7_mask:
1939 case Intrinsic::riscv_vsseg8_mask: {
1940 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1941 return;
1942 }
1943 case Intrinsic::riscv_vssseg2:
1944 case Intrinsic::riscv_vssseg3:
1945 case Intrinsic::riscv_vssseg4:
1946 case Intrinsic::riscv_vssseg5:
1947 case Intrinsic::riscv_vssseg6:
1948 case Intrinsic::riscv_vssseg7:
1949 case Intrinsic::riscv_vssseg8: {
1950 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1951 return;
1952 }
1953 case Intrinsic::riscv_vssseg2_mask:
1954 case Intrinsic::riscv_vssseg3_mask:
1955 case Intrinsic::riscv_vssseg4_mask:
1956 case Intrinsic::riscv_vssseg5_mask:
1957 case Intrinsic::riscv_vssseg6_mask:
1958 case Intrinsic::riscv_vssseg7_mask:
1959 case Intrinsic::riscv_vssseg8_mask: {
1960 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1961 return;
1962 }
1963 case Intrinsic::riscv_vsoxseg2:
1964 case Intrinsic::riscv_vsoxseg3:
1965 case Intrinsic::riscv_vsoxseg4:
1966 case Intrinsic::riscv_vsoxseg5:
1967 case Intrinsic::riscv_vsoxseg6:
1968 case Intrinsic::riscv_vsoxseg7:
1969 case Intrinsic::riscv_vsoxseg8:
1970 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1971 return;
1972 case Intrinsic::riscv_vsuxseg2:
1973 case Intrinsic::riscv_vsuxseg3:
1974 case Intrinsic::riscv_vsuxseg4:
1975 case Intrinsic::riscv_vsuxseg5:
1976 case Intrinsic::riscv_vsuxseg6:
1977 case Intrinsic::riscv_vsuxseg7:
1978 case Intrinsic::riscv_vsuxseg8:
1979 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1980 return;
1981 case Intrinsic::riscv_vsoxseg2_mask:
1982 case Intrinsic::riscv_vsoxseg3_mask:
1983 case Intrinsic::riscv_vsoxseg4_mask:
1984 case Intrinsic::riscv_vsoxseg5_mask:
1985 case Intrinsic::riscv_vsoxseg6_mask:
1986 case Intrinsic::riscv_vsoxseg7_mask:
1987 case Intrinsic::riscv_vsoxseg8_mask:
1988 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1989 return;
1990 case Intrinsic::riscv_vsuxseg2_mask:
1991 case Intrinsic::riscv_vsuxseg3_mask:
1992 case Intrinsic::riscv_vsuxseg4_mask:
1993 case Intrinsic::riscv_vsuxseg5_mask:
1994 case Intrinsic::riscv_vsuxseg6_mask:
1995 case Intrinsic::riscv_vsuxseg7_mask:
1996 case Intrinsic::riscv_vsuxseg8_mask:
1997 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1998 return;
1999 case Intrinsic::riscv_vsoxei:
2000 case Intrinsic::riscv_vsoxei_mask:
2001 case Intrinsic::riscv_vsuxei:
2002 case Intrinsic::riscv_vsuxei_mask: {
2003 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2004 IntNo == Intrinsic::riscv_vsuxei_mask;
2005 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2006 IntNo == Intrinsic::riscv_vsoxei_mask;
2007
2008 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2009 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2010
2011 unsigned CurOp = 2;
2013 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2014
2015 MVT IndexVT;
2016 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2017 /*IsStridedOrIndexed*/ true, Operands,
2018 /*IsLoad=*/false, &IndexVT);
2019
2021 "Element count mismatch");
2022
2024 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2025 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2026 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2027 report_fatal_error("The V extension does not support EEW=64 for index "
2028 "values when XLEN=32");
2029 }
2030 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2031 IsMasked, IsOrdered, IndexLog2EEW,
2032 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2033 MachineSDNode *Store =
2034 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2035
2036 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2037 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2038
2039 ReplaceNode(Node, Store);
2040 return;
2041 }
2042 case Intrinsic::riscv_vsm:
2043 case Intrinsic::riscv_vse:
2044 case Intrinsic::riscv_vse_mask:
2045 case Intrinsic::riscv_vsse:
2046 case Intrinsic::riscv_vsse_mask: {
2047 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2048 IntNo == Intrinsic::riscv_vsse_mask;
2049 bool IsStrided =
2050 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2051
2052 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2053 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2054
2055 unsigned CurOp = 2;
2057 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2058
2059 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2060 Operands);
2061
2063 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2064 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2065 MachineSDNode *Store =
2066 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2067 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2068 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2069
2070 ReplaceNode(Node, Store);
2071 return;
2072 }
2073 case Intrinsic::riscv_sf_vc_x_se:
2074 case Intrinsic::riscv_sf_vc_i_se:
2075 selectSF_VC_X_SE(Node);
2076 return;
2077 }
2078 break;
2079 }
2080 case ISD::BITCAST: {
2081 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2082 // Just drop bitcasts between vectors if both are fixed or both are
2083 // scalable.
2084 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2085 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2086 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2087 CurDAG->RemoveDeadNode(Node);
2088 return;
2089 }
2090 break;
2091 }
2092 case ISD::INSERT_SUBVECTOR: {
2093 SDValue V = Node->getOperand(0);
2094 SDValue SubV = Node->getOperand(1);
2095 SDLoc DL(SubV);
2096 auto Idx = Node->getConstantOperandVal(2);
2097 MVT SubVecVT = SubV.getSimpleValueType();
2098
2099 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2100 MVT SubVecContainerVT = SubVecVT;
2101 // Establish the correct scalable-vector types for any fixed-length type.
2102 if (SubVecVT.isFixedLengthVector()) {
2103 assert(Idx == 0 && V.isUndef());
2104 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2105 }
2106 MVT ContainerVT = VT;
2107 if (VT.isFixedLengthVector())
2108 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2109
2110 const auto *TRI = Subtarget->getRegisterInfo();
2111 unsigned SubRegIdx;
2112 std::tie(SubRegIdx, Idx) =
2114 ContainerVT, SubVecContainerVT, Idx, TRI);
2115
2116 // If the Idx hasn't been completely eliminated then this is a subvector
2117 // insert which doesn't naturally align to a vector register. These must
2118 // be handled using instructions to manipulate the vector registers.
2119 if (Idx != 0)
2120 break;
2121
2122 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
2123 [[maybe_unused]] bool IsSubVecPartReg =
2124 SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
2125 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
2126 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
2127 assert((!IsSubVecPartReg || V.isUndef()) &&
2128 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2129 "the subvector is smaller than a full-sized register");
2130
2131 // If we haven't set a SubRegIdx, then we must be going between
2132 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2133 if (SubRegIdx == RISCV::NoSubRegister) {
2134 unsigned InRegClassID =
2137 InRegClassID &&
2138 "Unexpected subvector extraction");
2139 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2140 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2141 DL, VT, SubV, RC);
2142 ReplaceNode(Node, NewNode);
2143 return;
2144 }
2145
2146 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2147 ReplaceNode(Node, Insert.getNode());
2148 return;
2149 }
2151 SDValue V = Node->getOperand(0);
2152 auto Idx = Node->getConstantOperandVal(1);
2153 MVT InVT = V.getSimpleValueType();
2154 SDLoc DL(V);
2155
2156 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2157 MVT SubVecContainerVT = VT;
2158 // Establish the correct scalable-vector types for any fixed-length type.
2159 if (VT.isFixedLengthVector()) {
2160 assert(Idx == 0);
2161 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2162 }
2163 if (InVT.isFixedLengthVector())
2164 InVT = TLI.getContainerForFixedLengthVector(InVT);
2165
2166 const auto *TRI = Subtarget->getRegisterInfo();
2167 unsigned SubRegIdx;
2168 std::tie(SubRegIdx, Idx) =
2170 InVT, SubVecContainerVT, Idx, TRI);
2171
2172 // If the Idx hasn't been completely eliminated then this is a subvector
2173 // extract which doesn't naturally align to a vector register. These must
2174 // be handled using instructions to manipulate the vector registers.
2175 if (Idx != 0)
2176 break;
2177
2178 // If we haven't set a SubRegIdx, then we must be going between
2179 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2180 if (SubRegIdx == RISCV::NoSubRegister) {
2181 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2183 InRegClassID &&
2184 "Unexpected subvector extraction");
2185 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2186 SDNode *NewNode =
2187 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2188 ReplaceNode(Node, NewNode);
2189 return;
2190 }
2191
2192 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2193 ReplaceNode(Node, Extract.getNode());
2194 return;
2195 }
2199 case RISCVISD::VFMV_V_F_VL: {
2200 // Try to match splat of a scalar load to a strided load with stride of x0.
2201 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2202 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2203 if (!Node->getOperand(0).isUndef())
2204 break;
2205 SDValue Src = Node->getOperand(1);
2206 auto *Ld = dyn_cast<LoadSDNode>(Src);
2207 // Can't fold load update node because the second
2208 // output is used so that load update node can't be removed.
2209 if (!Ld || Ld->isIndexed())
2210 break;
2211 EVT MemVT = Ld->getMemoryVT();
2212 // The memory VT should be the same size as the element type.
2213 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2214 break;
2215 if (!IsProfitableToFold(Src, Node, Node) ||
2216 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2217 break;
2218
2219 SDValue VL;
2220 if (IsScalarMove) {
2221 // We could deal with more VL if we update the VSETVLI insert pass to
2222 // avoid introducing more VSETVLI.
2223 if (!isOneConstant(Node->getOperand(2)))
2224 break;
2225 selectVLOp(Node->getOperand(2), VL);
2226 } else
2227 selectVLOp(Node->getOperand(2), VL);
2228
2229 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2230 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2231
2232 // If VL=1, then we don't need to do a strided load and can just do a
2233 // regular load.
2234 bool IsStrided = !isOneConstant(VL);
2235
2236 // Only do a strided load if we have optimized zero-stride vector load.
2237 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2238 break;
2239
2241 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2242 Ld->getBasePtr()};
2243 if (IsStrided)
2244 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2246 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2247 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2248
2250 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2251 /*IsMasked*/ false, IsStrided, /*FF*/ false,
2252 Log2SEW, static_cast<unsigned>(LMUL));
2253 MachineSDNode *Load =
2254 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2255 // Update the chain.
2256 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2257 // Record the mem-refs
2258 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2259 // Replace the splat with the vlse.
2260 ReplaceNode(Node, Load);
2261 return;
2262 }
2263 case ISD::PREFETCH:
2264 unsigned Locality = Node->getConstantOperandVal(3);
2265 if (Locality > 2)
2266 break;
2267
2268 if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) {
2269 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2271
2272 int NontemporalLevel = 0;
2273 switch (Locality) {
2274 case 0:
2275 NontemporalLevel = 3; // NTL.ALL
2276 break;
2277 case 1:
2278 NontemporalLevel = 1; // NTL.PALL
2279 break;
2280 case 2:
2281 NontemporalLevel = 0; // NTL.P1
2282 break;
2283 default:
2284 llvm_unreachable("unexpected locality value.");
2285 }
2286
2287 if (NontemporalLevel & 0b1)
2289 if (NontemporalLevel & 0b10)
2291 }
2292 break;
2293 }
2294
2295 // Select the default instruction.
2296 SelectCode(Node);
2297}
2298
2300 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2301 std::vector<SDValue> &OutOps) {
2302 // Always produce a register and immediate operand, as expected by
2303 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2304 switch (ConstraintID) {
2307 SDValue Op0, Op1;
2308 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2309 assert(Found && "SelectAddrRegImm should always succeed");
2310 OutOps.push_back(Op0);
2311 OutOps.push_back(Op1);
2312 return false;
2313 }
2315 OutOps.push_back(Op);
2316 OutOps.push_back(
2317 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2318 return false;
2319 default:
2320 report_fatal_error("Unexpected asm memory constraint " +
2321 InlineAsm::getMemConstraintName(ConstraintID));
2322 }
2323
2324 return true;
2325}
2326
2328 SDValue &Offset) {
2329 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2330 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2331 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2332 return true;
2333 }
2334
2335 return false;
2336}
2337
2338// Select a frame index and an optional immediate offset from an ADD or OR.
2340 SDValue &Offset) {
2342 return true;
2343
2345 return false;
2346
2347 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
2348 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2349 if (isInt<12>(CVal)) {
2350 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
2351 Subtarget->getXLenVT());
2353 Subtarget->getXLenVT());
2354 return true;
2355 }
2356 }
2357
2358 return false;
2359}
2360
2361// Fold constant addresses.
2362static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2363 const MVT VT, const RISCVSubtarget *Subtarget,
2365 bool IsPrefetch = false) {
2366 if (!isa<ConstantSDNode>(Addr))
2367 return false;
2368
2369 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2370
2371 // If the constant is a simm12, we can fold the whole constant and use X0 as
2372 // the base. If the constant can be materialized with LUI+simm12, use LUI as
2373 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2374 int64_t Lo12 = SignExtend64<12>(CVal);
2375 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2376 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2377 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2378 return false;
2379
2380 if (Hi) {
2381 int64_t Hi20 = (Hi >> 12) & 0xfffff;
2382 Base = SDValue(
2383 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2384 CurDAG->getTargetConstant(Hi20, DL, VT)),
2385 0);
2386 } else {
2387 Base = CurDAG->getRegister(RISCV::X0, VT);
2388 }
2389 Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2390 return true;
2391 }
2392
2393 // Ask how constant materialization would handle this constant.
2394 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2395
2396 // If the last instruction would be an ADDI, we can fold its immediate and
2397 // emit the rest of the sequence as the base.
2398 if (Seq.back().getOpcode() != RISCV::ADDI)
2399 return false;
2400 Lo12 = Seq.back().getImm();
2401 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2402 return false;
2403
2404 // Drop the last instruction.
2405 Seq.pop_back();
2406 assert(!Seq.empty() && "Expected more instructions in sequence");
2407
2408 Base = selectImmSeq(CurDAG, DL, VT, Seq);
2409 Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2410 return true;
2411}
2412
2413// Is this ADD instruction only used as the base pointer of scalar loads and
2414// stores?
2416 for (auto *Use : Add->uses()) {
2417 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
2418 Use->getOpcode() != ISD::ATOMIC_LOAD &&
2419 Use->getOpcode() != ISD::ATOMIC_STORE)
2420 return false;
2421 EVT VT = cast<MemSDNode>(Use)->getMemoryVT();
2422 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2423 VT != MVT::f64)
2424 return false;
2425 // Don't allow stores of the value. It must be used as the address.
2426 if (Use->getOpcode() == ISD::STORE &&
2427 cast<StoreSDNode>(Use)->getValue() == Add)
2428 return false;
2429 if (Use->getOpcode() == ISD::ATOMIC_STORE &&
2430 cast<AtomicSDNode>(Use)->getVal() == Add)
2431 return false;
2432 }
2433
2434 return true;
2435}
2436
2438 unsigned MaxShiftAmount,
2440 SDValue &Scale) {
2441 EVT VT = Addr.getSimpleValueType();
2442 auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2443 SDValue &Shift) {
2444 uint64_t ShiftAmt = 0;
2445 Index = N;
2446
2447 if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
2448 // Only match shifts by a value in range [0, MaxShiftAmount].
2449 if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
2450 Index = N.getOperand(0);
2451 ShiftAmt = N.getConstantOperandVal(1);
2452 }
2453 }
2454
2455 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
2456 return ShiftAmt != 0;
2457 };
2458
2459 if (Addr.getOpcode() == ISD::ADD) {
2460 if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2461 SDValue AddrB = Addr.getOperand(0);
2462 if (AddrB.getOpcode() == ISD::ADD &&
2463 UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
2464 !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
2465 isInt<12>(C1->getSExtValue())) {
2466 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2467 SDValue C1Val =
2468 CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
2469 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
2470 AddrB.getOperand(1), C1Val),
2471 0);
2472 return true;
2473 }
2474 } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
2475 Base = Addr.getOperand(1);
2476 return true;
2477 } else {
2478 UnwrapShl(Addr.getOperand(1), Index, Scale);
2479 Base = Addr.getOperand(0);
2480 return true;
2481 }
2482 } else if (UnwrapShl(Addr, Index, Scale)) {
2483 EVT VT = Addr.getValueType();
2484 Base = CurDAG->getRegister(RISCV::X0, VT);
2485 return true;
2486 }
2487
2488 return false;
2489}
2490
2492 SDValue &Offset, bool IsINX) {
2494 return true;
2495
2496 SDLoc DL(Addr);
2497 MVT VT = Addr.getSimpleValueType();
2498
2499 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2500 Base = Addr.getOperand(0);
2501 Offset = Addr.getOperand(1);
2502 return true;
2503 }
2504
2505 int64_t RV32ZdinxRange = IsINX ? 4 : 0;
2507 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2508 if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
2509 Base = Addr.getOperand(0);
2510 if (Base.getOpcode() == RISCVISD::ADD_LO) {
2511 SDValue LoOperand = Base.getOperand(1);
2512 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2513 // If the Lo in (ADD_LO hi, lo) is a global variable's address
2514 // (its low part, really), then we can rely on the alignment of that
2515 // variable to provide a margin of safety before low part can overflow
2516 // the 12 bits of the load/store offset. Check if CVal falls within
2517 // that margin; if so (low part + CVal) can't overflow.
2518 const DataLayout &DL = CurDAG->getDataLayout();
2519 Align Alignment = commonAlignment(
2520 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2521 if (CVal == 0 || Alignment > CVal) {
2522 int64_t CombinedOffset = CVal + GA->getOffset();
2523 Base = Base.getOperand(0);
2525 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2526 CombinedOffset, GA->getTargetFlags());
2527 return true;
2528 }
2529 }
2530 }
2531
2532 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2533 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2534 Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2535 return true;
2536 }
2537 }
2538
2539 // Handle ADD with large immediates.
2540 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2541 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2542 assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
2543 "simm12 not already handled?");
2544
2545 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2546 // an ADDI for part of the offset and fold the rest into the load/store.
2547 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2548 if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
2549 int64_t Adj = CVal < 0 ? -2048 : 2047;
2550 Base = SDValue(
2551 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2552 CurDAG->getTargetConstant(Adj, DL, VT)),
2553 0);
2554 Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);
2555 return true;
2556 }
2557
2558 // For larger immediates, we might be able to save one instruction from
2559 // constant materialization by folding the Lo12 bits of the immediate into
2560 // the address. We should only do this if the ADD is only used by loads and
2561 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2562 // separately with the full materialized immediate creating extra
2563 // instructions.
2564 if (isWorthFoldingAdd(Addr) &&
2565 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2566 Offset)) {
2567 // Insert an ADD instruction with the materialized Hi52 bits.
2568 Base = SDValue(
2569 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2570 0);
2571 return true;
2572 }
2573 }
2574
2575 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset))
2576 return true;
2577
2578 Base = Addr;
2579 Offset = CurDAG->getTargetConstant(0, DL, VT);
2580 return true;
2581}
2582
2583/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2584/// Offset shoule be all zeros.
2586 SDValue &Offset) {
2588 return true;
2589
2590 SDLoc DL(Addr);
2591 MVT VT = Addr.getSimpleValueType();
2592
2594 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2595 if (isInt<12>(CVal)) {
2596 Base = Addr.getOperand(0);
2597
2598 // Early-out if not a valid offset.
2599 if ((CVal & 0b11111) != 0) {
2600 Base = Addr;
2601 Offset = CurDAG->getTargetConstant(0, DL, VT);
2602 return true;
2603 }
2604
2605 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2606 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2607 Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2608 return true;
2609 }
2610 }
2611
2612 // Handle ADD with large immediates.
2613 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2614 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2615 assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
2616 "simm12 not already handled?");
2617
2618 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
2619 // one instruction by folding adjustment (-2048 or 2016) into the address.
2620 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
2621 int64_t Adj = CVal < 0 ? -2048 : 2016;
2622 int64_t AdjustedOffset = CVal - Adj;
2624 RISCV::ADDI, DL, VT, Addr.getOperand(0),
2625 CurDAG->getTargetConstant(AdjustedOffset, DL, VT)),
2626 0);
2627 Offset = CurDAG->getTargetConstant(Adj, DL, VT);
2628 return true;
2629 }
2630
2631 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2632 Offset, true)) {
2633 // Insert an ADD instruction with the materialized Hi52 bits.
2634 Base = SDValue(
2635 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2636 0);
2637 return true;
2638 }
2639 }
2640
2641 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true))
2642 return true;
2643
2644 Base = Addr;
2645 Offset = CurDAG->getTargetConstant(0, DL, VT);
2646 return true;
2647}
2648
2650 SDValue &ShAmt) {
2651 ShAmt = N;
2652
2653 // Peek through zext.
2654 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
2655 ShAmt = ShAmt.getOperand(0);
2656
2657 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
2658 // amount. If there is an AND on the shift amount, we can bypass it if it
2659 // doesn't affect any of those bits.
2660 if (ShAmt.getOpcode() == ISD::AND &&
2661 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2662 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
2663
2664 // Since the max shift amount is a power of 2 we can subtract 1 to make a
2665 // mask that covers the bits needed to represent all shift amounts.
2666 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
2667 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
2668
2669 if (ShMask.isSubsetOf(AndMask)) {
2670 ShAmt = ShAmt.getOperand(0);
2671 } else {
2672 // SimplifyDemandedBits may have optimized the mask so try restoring any
2673 // bits that are known zero.
2674 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
2675 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
2676 return true;
2677 ShAmt = ShAmt.getOperand(0);
2678 }
2679 }
2680
2681 if (ShAmt.getOpcode() == ISD::ADD &&
2682 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2683 uint64_t Imm = ShAmt.getConstantOperandVal(1);
2684 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
2685 // to avoid the ADD.
2686 if (Imm != 0 && Imm % ShiftWidth == 0) {
2687 ShAmt = ShAmt.getOperand(0);
2688 return true;
2689 }
2690 } else if (ShAmt.getOpcode() == ISD::SUB &&
2691 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
2692 uint64_t Imm = ShAmt.getConstantOperandVal(0);
2693 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2694 // generate a NEG instead of a SUB of a constant.
2695 if (Imm != 0 && Imm % ShiftWidth == 0) {
2696 SDLoc DL(ShAmt);
2697 EVT VT = ShAmt.getValueType();
2698 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
2699 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
2700 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
2701 ShAmt.getOperand(1));
2702 ShAmt = SDValue(Neg, 0);
2703 return true;
2704 }
2705 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2706 // to generate a NOT instead of a SUB of a constant.
2707 if (Imm % ShiftWidth == ShiftWidth - 1) {
2708 SDLoc DL(ShAmt);
2709 EVT VT = ShAmt.getValueType();
2710 MachineSDNode *Not =
2711 CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1),
2712 CurDAG->getTargetConstant(-1, DL, VT));
2713 ShAmt = SDValue(Not, 0);
2714 return true;
2715 }
2716 }
2717
2718 return true;
2719}
2720
2721/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2722/// check for equality with 0. This function emits instructions that convert the
2723/// seteq/setne into something that can be compared with 0.
2724/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
2725/// ISD::SETNE).
2727 SDValue &Val) {
2728 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
2729 "Unexpected condition code!");
2730
2731 // We're looking for a setcc.
2732 if (N->getOpcode() != ISD::SETCC)
2733 return false;
2734
2735 // Must be an equality comparison.
2736 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
2737 if (CCVal != ExpectedCCVal)
2738 return false;
2739
2740 SDValue LHS = N->getOperand(0);
2741 SDValue RHS = N->getOperand(1);
2742
2743 if (!LHS.getValueType().isScalarInteger())
2744 return false;
2745
2746 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
2747 if (isNullConstant(RHS)) {
2748 Val = LHS;
2749 return true;
2750 }
2751
2752 SDLoc DL(N);
2753
2754 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
2755 int64_t CVal = C->getSExtValue();
2756 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2757 // non-zero otherwise.
2758 if (CVal == -2048) {
2759 Val =
2761 RISCV::XORI, DL, N->getValueType(0), LHS,
2762 CurDAG->getTargetConstant(CVal, DL, N->getValueType(0))),
2763 0);
2764 return true;
2765 }
2766 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
2767 // LHS is equal to the RHS and non-zero otherwise.
2768 if (isInt<12>(CVal) || CVal == 2048) {
2769 Val =
2771 RISCV::ADDI, DL, N->getValueType(0), LHS,
2772 CurDAG->getTargetConstant(-CVal, DL, N->getValueType(0))),
2773 0);
2774 return true;
2775 }
2776 }
2777
2778 // If nothing else we can XOR the LHS and RHS to produce zero if they are
2779 // equal and a non-zero value if they aren't.
2780 Val = SDValue(
2781 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
2782 return true;
2783}
2784
2786 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
2787 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
2788 Val = N.getOperand(0);
2789 return true;
2790 }
2791
2792 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
2793 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
2794 return N;
2795
2796 SDValue N0 = N.getOperand(0);
2797 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2798 N.getConstantOperandVal(1) == ShiftAmt &&
2799 N0.getConstantOperandVal(1) == ShiftAmt)
2800 return N0.getOperand(0);
2801
2802 return N;
2803 };
2804
2805 MVT VT = N.getSimpleValueType();
2806 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
2807 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
2808 return true;
2809 }
2810
2811 return false;
2812}
2813
2815 if (N.getOpcode() == ISD::AND) {
2816 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
2817 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
2818 Val = N.getOperand(0);
2819 return true;
2820 }
2821 }
2822 MVT VT = N.getSimpleValueType();
2823 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
2824 if (CurDAG->MaskedValueIsZero(N, Mask)) {
2825 Val = N;
2826 return true;
2827 }
2828
2829 return false;
2830}
2831
2832/// Look for various patterns that can be done with a SHL that can be folded
2833/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
2834/// SHXADD we are trying to match.
2836 SDValue &Val) {
2837 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
2838 SDValue N0 = N.getOperand(0);
2839
2840 bool LeftShift = N0.getOpcode() == ISD::SHL;
2841 if ((LeftShift || N0.getOpcode() == ISD::SRL) &&
2842 isa<ConstantSDNode>(N0.getOperand(1))) {
2843 uint64_t Mask = N.getConstantOperandVal(1);
2844 unsigned C2 = N0.getConstantOperandVal(1);
2845
2846 unsigned XLen = Subtarget->getXLen();
2847 if (LeftShift)
2848 Mask &= maskTrailingZeros<uint64_t>(C2);
2849 else
2850 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
2851
2852 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
2853 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
2854 // followed by a SHXADD with c3 for the X amount.
2855 if (isShiftedMask_64(Mask)) {
2856 unsigned Leading = XLen - llvm::bit_width(Mask);
2857 unsigned Trailing = llvm::countr_zero(Mask);
2858 if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
2859 SDLoc DL(N);
2860 EVT VT = N.getValueType();
2862 RISCV::SRLI, DL, VT, N0.getOperand(0),
2863 CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
2864 0);
2865 return true;
2866 }
2867 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
2868 // leading zeros and c3 trailing zeros. We can use an SRLI by C3
2869 // followed by a SHXADD using c3 for the X amount.
2870 if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
2871 SDLoc DL(N);
2872 EVT VT = N.getValueType();
2873 Val = SDValue(
2875 RISCV::SRLI, DL, VT, N0.getOperand(0),
2876 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
2877 0);
2878 return true;
2879 }
2880 }
2881 }
2882 }
2883
2884 bool LeftShift = N.getOpcode() == ISD::SHL;
2885 if ((LeftShift || N.getOpcode() == ISD::SRL) &&
2886 isa<ConstantSDNode>(N.getOperand(1))) {
2887 SDValue N0 = N.getOperand(0);
2888 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
2889 isa<ConstantSDNode>(N0.getOperand(1))) {
2890 uint64_t Mask = N0.getConstantOperandVal(1);
2891 if (isShiftedMask_64(Mask)) {
2892 unsigned C1 = N.getConstantOperandVal(1);
2893 unsigned XLen = Subtarget->getXLen();
2894 unsigned Leading = XLen - llvm::bit_width(Mask);
2895 unsigned Trailing = llvm::countr_zero(Mask);
2896 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
2897 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
2898 if (LeftShift && Leading == 32 && Trailing > 0 &&
2899 (Trailing + C1) == ShAmt) {
2900 SDLoc DL(N);
2901 EVT VT = N.getValueType();
2903 RISCV::SRLIW, DL, VT, N0.getOperand(0),
2904 CurDAG->getTargetConstant(Trailing, DL, VT)),
2905 0);
2906 return true;
2907 }
2908 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
2909 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
2910 if (!LeftShift && Leading == 32 && Trailing > C1 &&
2911 (Trailing - C1) == ShAmt) {
2912 SDLoc DL(N);
2913 EVT VT = N.getValueType();
2915 RISCV::SRLIW, DL, VT, N0.getOperand(0),
2916 CurDAG->getTargetConstant(Trailing, DL, VT)),
2917 0);
2918 return true;
2919 }
2920 }
2921 }
2922 }
2923
2924 return false;
2925}
2926
2927/// Look for various patterns that can be done with a SHL that can be folded
2928/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
2929/// SHXADD_UW we are trying to match.
2931 SDValue &Val) {
2932 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
2933 N.hasOneUse()) {
2934 SDValue N0 = N.getOperand(0);
2935 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2936 N0.hasOneUse()) {
2937 uint64_t Mask = N.getConstantOperandVal(1);
2938 unsigned C2 = N0.getConstantOperandVal(1);
2939
2940 Mask &= maskTrailingZeros<uint64_t>(C2);
2941
2942 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
2943 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
2944 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
2945 if (isShiftedMask_64(Mask)) {
2946 unsigned Leading = llvm::countl_zero(Mask);
2947 unsigned Trailing = llvm::countr_zero(Mask);
2948 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
2949 SDLoc DL(N);
2950 EVT VT = N.getValueType();
2952 RISCV::SLLI, DL, VT, N0.getOperand(0),
2953 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
2954 0);
2955 return true;
2956 }
2957 }
2958 }
2959 }
2960
2961 return false;
2962}
2963
2964static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
2965 unsigned Bits,
2966 const TargetInstrInfo *TII) {
2967 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
2968
2969 if (!MCOpcode)
2970 return false;
2971
2972 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
2973 const uint64_t TSFlags = MCID.TSFlags;
2974 if (!RISCVII::hasSEWOp(TSFlags))
2975 return false;
2976 assert(RISCVII::hasVLOp(TSFlags));
2977
2978 bool HasGlueOp = User->getGluedNode() != nullptr;
2979 unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
2980 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
2981 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
2982 unsigned VLIdx =
2983 User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
2984 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
2985
2986 if (UserOpNo == VLIdx)
2987 return false;
2988
2989 auto NumDemandedBits =
2990 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
2991 return NumDemandedBits && Bits >= *NumDemandedBits;
2992}
2993
2994// Return true if all users of this SDNode* only consume the lower \p Bits.
2995// This can be used to form W instructions for add/sub/mul/shl even when the
2996// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
2997// SimplifyDemandedBits has made it so some users see a sext_inreg and some
2998// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
2999// the add/sub/mul/shl to become non-W instructions. By checking the users we
3000// may be able to use a W instruction and CSE with the other instruction if
3001// this has happened. We could try to detect that the CSE opportunity exists
3002// before doing this, but that would be more complicated.
3004 const unsigned Depth) const {
3005 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3006 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3007 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3008 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3009 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3010 isa<ConstantSDNode>(Node) || Depth != 0) &&
3011 "Unexpected opcode");
3012
3014 return false;
3015
3016 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3017 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3018 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
3019 return false;
3020
3021 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
3022 SDNode *User = *UI;
3023 // Users of this node should have already been instruction selected
3024 if (!User->isMachineOpcode())
3025 return false;
3026
3027 // TODO: Add more opcodes?
3028 switch (User->getMachineOpcode()) {
3029 default:
3030 if (vectorPseudoHasAllNBitUsers(User, UI.getOperandNo(), Bits, TII))
3031 break;
3032 return false;
3033 case RISCV::ADDW:
3034 case RISCV::ADDIW:
3035 case RISCV::SUBW:
3036 case RISCV::MULW:
3037 case RISCV::SLLW:
3038 case RISCV::SLLIW:
3039 case RISCV::SRAW:
3040 case RISCV::SRAIW:
3041 case RISCV::SRLW:
3042 case RISCV::SRLIW:
3043 case RISCV::DIVW:
3044 case RISCV::DIVUW:
3045 case RISCV::REMW:
3046 case RISCV::REMUW:
3047 case RISCV::ROLW:
3048 case RISCV::RORW:
3049 case RISCV::RORIW:
3050 case RISCV::CLZW:
3051 case RISCV::CTZW:
3052 case RISCV::CPOPW:
3053 case RISCV::SLLI_UW:
3054 case RISCV::FMV_W_X:
3055 case RISCV::FCVT_H_W:
3056 case RISCV::FCVT_H_WU:
3057 case RISCV::FCVT_S_W:
3058 case RISCV::FCVT_S_WU:
3059 case RISCV::FCVT_D_W:
3060 case RISCV::FCVT_D_WU:
3061 case RISCV::TH_REVW:
3062 case RISCV::TH_SRRIW:
3063 if (Bits < 32)
3064 return false;
3065 break;
3066 case RISCV::SLL:
3067 case RISCV::SRA:
3068 case RISCV::SRL:
3069 case RISCV::ROL:
3070 case RISCV::ROR:
3071 case RISCV::BSET:
3072 case RISCV::BCLR:
3073 case RISCV::BINV:
3074 // Shift amount operands only use log2(Xlen) bits.
3075 if (UI.getOperandNo() != 1 || Bits < Log2_32(Subtarget->getXLen()))
3076 return false;
3077 break;
3078 case RISCV::SLLI:
3079 // SLLI only uses the lower (XLen - ShAmt) bits.
3080 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
3081 return false;
3082 break;
3083 case RISCV::ANDI:
3084 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
3085 break;
3086 goto RecCheck;
3087 case RISCV::ORI: {
3088 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3089 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
3090 break;
3091 [[fallthrough]];
3092 }
3093 case RISCV::AND:
3094 case RISCV::OR:
3095 case RISCV::XOR:
3096 case RISCV::XORI:
3097 case RISCV::ANDN:
3098 case RISCV::ORN:
3099 case RISCV::XNOR:
3100 case RISCV::SH1ADD:
3101 case RISCV::SH2ADD:
3102 case RISCV::SH3ADD:
3103 RecCheck:
3104 if (hasAllNBitUsers(User, Bits, Depth + 1))
3105 break;
3106 return false;
3107 case RISCV::SRLI: {
3108 unsigned ShAmt = User->getConstantOperandVal(1);
3109 // If we are shifting right by less than Bits, and users don't demand any
3110 // bits that were shifted into [Bits-1:0], then we can consider this as an
3111 // N-Bit user.
3112 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
3113 break;
3114 return false;
3115 }
3116 case RISCV::SEXT_B:
3117 case RISCV::PACKH:
3118 if (Bits < 8)
3119 return false;
3120 break;
3121 case RISCV::SEXT_H:
3122 case RISCV::FMV_H_X:
3123 case RISCV::ZEXT_H_RV32:
3124 case RISCV::ZEXT_H_RV64:
3125 case RISCV::PACKW:
3126 if (Bits < 16)
3127 return false;
3128 break;
3129 case RISCV::PACK:
3130 if (Bits < (Subtarget->getXLen() / 2))
3131 return false;
3132 break;
3133 case RISCV::ADD_UW:
3134 case RISCV::SH1ADD_UW:
3135 case RISCV::SH2ADD_UW:
3136 case RISCV::SH3ADD_UW:
3137 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3138 // 32 bits.
3139 if (UI.getOperandNo() != 0 || Bits < 32)
3140 return false;
3141 break;
3142 case RISCV::SB:
3143 if (UI.getOperandNo() != 0 || Bits < 8)
3144 return false;
3145 break;
3146 case RISCV::SH:
3147 if (UI.getOperandNo() != 0 || Bits < 16)
3148 return false;
3149 break;
3150 case RISCV::SW:
3151 if (UI.getOperandNo() != 0 || Bits < 32)
3152 return false;
3153 break;
3154 }
3155 }
3156
3157 return true;
3158}
3159
3160// Select a constant that can be represented as (sign_extend(imm5) << imm2).
3162 SDValue &Shl2) {
3163 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3164 int64_t Offset = C->getSExtValue();
3165 int64_t Shift;
3166 for (Shift = 0; Shift < 4; Shift++)
3167 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
3168 break;
3169
3170 // Constant cannot be encoded.
3171 if (Shift == 4)
3172 return false;
3173
3174 EVT Ty = N->getValueType(0);
3175 Simm5 = CurDAG->getTargetConstant(Offset >> Shift, SDLoc(N), Ty);
3176 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
3177 return true;
3178 }
3179
3180 return false;
3181}
3182
3183// Select VL as a 5 bit immediate or a value that will become a register. This
3184// allows us to choose betwen VSETIVLI or VSETVLI later.
3186 auto *C = dyn_cast<ConstantSDNode>(N);
3187 if (C && isUInt<5>(C->getZExtValue())) {
3188 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
3189 N->getValueType(0));
3190 } else if (C && C->isAllOnes()) {
3191 // Treat all ones as VLMax.
3193 N->getValueType(0));
3194 } else if (isa<RegisterSDNode>(N) &&
3195 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
3196 // All our VL operands use an operand that allows GPRNoX0 or an immediate
3197 // as the register class. Convert X0 to a special immediate to pass the
3198 // MachineVerifier. This is recognized specially by the vsetvli insertion
3199 // pass.
3201 N->getValueType(0));
3202 } else {
3203 VL = N;
3204 }
3205
3206 return true;
3207}
3208
3210 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
3211 if (!N.getOperand(0).isUndef())
3212 return SDValue();
3213 N = N.getOperand(1);
3214 }
3215 SDValue Splat = N;
3216 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
3217 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
3218 !Splat.getOperand(0).isUndef())
3219 return SDValue();
3220 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
3221 return Splat;
3222}
3223
3226 if (!Splat)
3227 return false;
3228
3229 SplatVal = Splat.getOperand(1);
3230 return true;
3231}
3232
3234 SelectionDAG &DAG,
3235 const RISCVSubtarget &Subtarget,
3236 std::function<bool(int64_t)> ValidateImm) {
3238 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
3239 return false;
3240
3241 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
3242 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
3243 "Unexpected splat operand type");
3244
3245 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3246 // type is wider than the resulting vector element type: an implicit
3247 // truncation first takes place. Therefore, perform a manual
3248 // truncation/sign-extension in order to ignore any truncated bits and catch
3249 // any zero-extended immediate.
3250 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3251 // sign-extending to (XLenVT -1).
3252 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
3253
3254 int64_t SplatImm = SplatConst.getSExtValue();
3255
3256 if (!ValidateImm(SplatImm))
3257 return false;
3258
3259 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
3260 return true;
3261}
3262
3264 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
3265 [](int64_t Imm) { return isInt<5>(Imm); });
3266}
3267
3269 return selectVSplatImmHelper(
3270 N, SplatVal, *CurDAG, *Subtarget,
3271 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
3272}
3273
3275 SDValue &SplatVal) {
3276 return selectVSplatImmHelper(
3277 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
3278 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
3279 });
3280}
3281
3283 SDValue &SplatVal) {
3284 return selectVSplatImmHelper(
3285 N, SplatVal, *CurDAG, *Subtarget,
3286 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
3287}
3288
3290 // Truncates are custom lowered during legalization.
3291 auto IsTrunc = [this](SDValue N) {
3292 if (N->getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
3293 return false;
3294 SDValue VL;
3295 selectVLOp(N->getOperand(2), VL);
3296 // Any vmset_vl is ok, since any bits past VL are undefined and we can
3297 // assume they are set.
3298 return N->getOperand(1).getOpcode() == RISCVISD::VMSET_VL &&
3299 isa<ConstantSDNode>(VL) &&
3300 cast<ConstantSDNode>(VL)->getSExtValue() == RISCV::VLMaxSentinel;
3301 };
3302
3303 // We can have multiple nested truncates, so unravel them all if needed.
3304 while (N->getOpcode() == ISD::SIGN_EXTEND ||
3305 N->getOpcode() == ISD::ZERO_EXTEND || IsTrunc(N)) {
3306 if (!N.hasOneUse() ||
3307 N.getValueType().getSizeInBits().getKnownMinValue() < 8)
3308 return false;
3309 N = N->getOperand(0);
3310 }
3311
3312 return selectVSplat(N, SplatVal);
3313}
3314
3316 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3317 if (!CFP)
3318 return false;
3319 const APFloat &APF = CFP->getValueAPF();
3320 // td can handle +0.0 already.
3321 if (APF.isPosZero())
3322 return false;
3323
3324 MVT VT = CFP->getSimpleValueType(0);
3325
3326 // Even if this FPImm requires an additional FNEG (i.e. the second element of
3327 // the returned pair is true) we still prefer FLI + FNEG over immediate
3328 // materialization as the latter might generate a longer instruction sequence.
3329 if (static_cast<const RISCVTargetLowering *>(TLI)
3330 ->getLegalZfaFPImm(APF, VT)
3331 .first >= 0)
3332 return false;
3333
3334 MVT XLenVT = Subtarget->getXLenVT();
3335 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3336 assert(APF.isNegZero() && "Unexpected constant.");
3337 return false;
3338 }
3339 SDLoc DL(N);
3340 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3341 *Subtarget);
3342 return true;
3343}
3344
3346 SDValue &Imm) {
3347 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3348 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3349
3350 if (!isInt<5>(ImmVal))
3351 return false;
3352
3353 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());
3354 return true;
3355 }
3356
3357 return false;
3358}
3359
3360// Try to remove sext.w if the input is a W instruction or can be made into
3361// a W instruction cheaply.
3362bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3363 // Look for the sext.w pattern, addiw rd, rs1, 0.
3364 if (N->getMachineOpcode() != RISCV::ADDIW ||
3365 !isNullConstant(N->getOperand(1)))
3366 return false;
3367
3368 SDValue N0 = N->getOperand(0);
3369 if (!N0.isMachineOpcode())
3370 return false;
3371
3372 switch (N0.getMachineOpcode()) {
3373 default:
3374 break;
3375 case RISCV::ADD:
3376 case RISCV::ADDI:
3377 case RISCV::SUB:
3378 case RISCV::MUL:
3379 case RISCV::SLLI: {
3380 // Convert sext.w+add/sub/mul to their W instructions. This will create
3381 // a new independent instruction. This improves latency.
3382 unsigned Opc;
3383 switch (N0.getMachineOpcode()) {
3384 default:
3385 llvm_unreachable("Unexpected opcode!");
3386 case RISCV::ADD: Opc = RISCV::ADDW; break;
3387 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3388 case RISCV::SUB: Opc = RISCV::SUBW; break;
3389 case RISCV::MUL: Opc = RISCV::MULW; break;
3390 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3391 }
3392
3393 SDValue N00 = N0.getOperand(0);
3394 SDValue N01 = N0.getOperand(1);
3395
3396 // Shift amount needs to be uimm5.
3397 if (N0.getMachineOpcode() == RISCV::SLLI &&
3398 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
3399 break;
3400
3401 SDNode *Result =
3402 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
3403 N00, N01);
3404 ReplaceUses(N, Result);
3405 return true;
3406 }
3407 case RISCV::ADDW:
3408 case RISCV::ADDIW:
3409 case RISCV::SUBW:
3410 case RISCV::MULW:
3411 case RISCV::SLLIW:
3412 case RISCV::PACKW:
3413 case RISCV::TH_MULAW:
3414 case RISCV::TH_MULAH:
3415 case RISCV::TH_MULSW:
3416 case RISCV::TH_MULSH:
3417 if (N0.getValueType() == MVT::i32)
3418 break;
3419
3420 // Result is already sign extended just remove the sext.w.
3421 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
3422 ReplaceUses(N, N0.getNode());
3423 return true;
3424 }
3425
3426 return false;
3427}
3428
3429static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
3430 // Check that we're using V0 as a mask register.
3431 if (!isa<RegisterSDNode>(MaskOp) ||
3432 cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)
3433 return false;
3434
3435 // The glued user defines V0.
3436 const auto *Glued = GlueOp.getNode();
3437
3438 if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
3439 return false;
3440
3441 // Check that we're defining V0 as a mask register.
3442 if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
3443 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
3444 return false;
3445
3446 // Check the instruction defining V0; it needs to be a VMSET pseudo.
3447 SDValue MaskSetter = Glued->getOperand(2);
3448
3449 // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
3450 // from an extract_subvector or insert_subvector.
3451 if (MaskSetter->isMachineOpcode() &&
3452 MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)
3453 MaskSetter = MaskSetter->getOperand(0);
3454
3455 const auto IsVMSet = [](unsigned Opc) {
3456 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
3457 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
3458 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
3459 Opc == RISCV::PseudoVMSET_M_B8;
3460 };
3461
3462 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3463 // undefined behaviour if it's the wrong bitwidth, so we could choose to
3464 // assume that it's all-ones? Same applies to its VL.
3465 return MaskSetter->isMachineOpcode() &&
3466 IsVMSet(MaskSetter.getMachineOpcode());
3467}
3468
3469// Return true if we can make sure mask of N is all-ones mask.
3470static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
3471 return usesAllOnesMask(N->getOperand(MaskOpIdx),
3472 N->getOperand(N->getNumOperands() - 1));
3473}
3474
3475static bool isImplicitDef(SDValue V) {
3476 return V.isMachineOpcode() &&
3477 V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
3478}
3479
3480// Optimize masked RVV pseudo instructions with a known all-ones mask to their
3481// corresponding "unmasked" pseudo versions. The mask we're interested in will
3482// take the form of a V0 physical register operand, with a glued
3483// register-setting instruction.
3484bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
3486 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
3487 if (!I)
3488 return false;
3489
3490 unsigned MaskOpIdx = I->MaskOpIdx;
3491 if (!usesAllOnesMask(N, MaskOpIdx))
3492 return false;
3493
3494 // There are two classes of pseudos in the table - compares and
3495 // everything else. See the comment on RISCVMaskedPseudo for details.
3496 const unsigned Opc = I->UnmaskedPseudo;
3497 const MCInstrDesc &MCID = TII->get(Opc);
3498 const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags);
3499#ifndef NDEBUG
3500 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
3503 "Masked and unmasked pseudos are inconsistent");
3504 const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID);
3505 assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure");
3506#endif
3507
3509 // Skip the merge operand at index 0 if !UseTUPseudo.
3510 for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) {
3511 // Skip the mask, and the Glue.
3512 SDValue Op = N->getOperand(I);
3513 if (I == MaskOpIdx || Op.getValueType() == MVT::Glue)
3514 continue;
3515 Ops.push_back(Op);
3516 }
3517
3518 // Transitively apply any node glued to our new node.
3519 const auto *Glued = N->getGluedNode();
3520 if (auto *TGlued = Glued->getGluedNode())
3521 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
3522
3524 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3525
3526 if (!N->memoperands_empty())
3527 CurDAG->setNodeMemRefs(Result, N->memoperands());
3528
3529 Result->setFlags(N->getFlags());
3530 ReplaceUses(N, Result);
3531
3532 return true;
3533}
3534
3535static bool IsVMerge(SDNode *N) {
3536 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
3537}
3538
3539static bool IsVMv(SDNode *N) {
3540 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V;
3541}
3542
3543static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) {
3544 switch (LMUL) {
3545 case RISCVII::LMUL_F8:
3546 return RISCV::PseudoVMSET_M_B1;
3547 case RISCVII::LMUL_F4:
3548 return RISCV::PseudoVMSET_M_B2;
3549 case RISCVII::LMUL_F2:
3550 return RISCV::PseudoVMSET_M_B4;
3551 case RISCVII::LMUL_1:
3552 return RISCV::PseudoVMSET_M_B8;
3553 case RISCVII::LMUL_2:
3554 return RISCV::PseudoVMSET_M_B16;
3555 case RISCVII::LMUL_4:
3556 return RISCV::PseudoVMSET_M_B32;
3557 case RISCVII::LMUL_8:
3558 return RISCV::PseudoVMSET_M_B64;
3560 llvm_unreachable("Unexpected LMUL");
3561 }
3562 llvm_unreachable("Unknown VLMUL enum");
3563}
3564
3565// Try to fold away VMERGE_VVM instructions into their true operands:
3566//
3567// %true = PseudoVADD_VV ...
3568// %x = PseudoVMERGE_VVM %false, %false, %true, %mask
3569// ->
3570// %x = PseudoVADD_VV_MASK %false, ..., %mask
3571//
3572// We can only fold if vmerge's merge operand, vmerge's false operand and
3573// %true's merge operand (if it has one) are the same. This is because we have
3574// to consolidate them into one merge operand in the result.
3575//
3576// If %true is masked, then we can use its mask instead of vmerge's if vmerge's
3577// mask is all ones.
3578//
3579// We can also fold a VMV_V_V into its true operand, since it is equivalent to a
3580// VMERGE_VVM with an all ones mask.
3581//
3582// The resulting VL is the minimum of the two VLs.
3583//
3584// The resulting policy is the effective policy the vmerge would have had,
3585// i.e. whether or not it's merge operand was implicit-def.
3586bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
3587 SDValue Merge, False, True, VL, Mask, Glue;
3588 // A vmv.v.v is equivalent to a vmerge with an all-ones mask.
3589 if (IsVMv(N)) {
3590 Merge = N->getOperand(0);
3591 False = N->getOperand(0);
3592 True = N->getOperand(1);
3593 VL = N->getOperand(2);
3594 // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones
3595 // mask later below.
3596 } else {
3597 assert(IsVMerge(N));
3598 Merge = N->getOperand(0);
3599 False = N->getOperand(1);
3600 True = N->getOperand(2);
3601 Mask = N->getOperand(3);
3602 VL = N->getOperand(4);
3603 // We always have a glue node for the mask at v0.
3604 Glue = N->getOperand(N->getNumOperands() - 1);
3605 }
3606 assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
3607 assert(!Glue || Glue.getValueType() == MVT::Glue);
3608
3609 // We require that either merge and false are the same, or that merge
3610 // is undefined.
3611 if (Merge != False && !isImplicitDef(Merge))
3612 return false;
3613
3614 assert(True.getResNo() == 0 &&
3615 "Expect True is the first output of an instruction.");
3616
3617 // Need N is the exactly one using True.
3618 if (!True.hasOneUse())
3619 return false;
3620
3621 if (!True.isMachineOpcode())
3622 return false;
3623
3624 unsigned TrueOpc = True.getMachineOpcode();
3625 const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
3626 uint64_t TrueTSFlags = TrueMCID.TSFlags;
3627 bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
3628
3629 bool IsMasked = false;
3631 RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
3632 if (!Info && HasTiedDest) {
3633 Info = RISCV::getMaskedPseudoInfo(TrueOpc);
3634 IsMasked = true;
3635 }
3636
3637 if (!Info)
3638 return false;
3639
3640 // When Mask is not a true mask, this transformation is illegal for some
3641 // operations whose results are affected by mask, like viota.m.
3642 if (Info->MaskAffectsResult && Mask && !usesAllOnesMask(Mask, Glue))
3643 return false;
3644
3645 // If True has a merge operand then it needs to be the same as vmerge's False,
3646 // since False will be used for the result's merge operand.
3647 if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
3648 // The vmerge instruction must be TU.
3649 // FIXME: This could be relaxed, but we need to handle the policy for the
3650 // resulting op correctly.
3651 if (isImplicitDef(Merge))
3652 return false;
3653 SDValue MergeOpTrue = True->getOperand(0);
3654 if (False != MergeOpTrue)
3655 return false;
3656 }
3657
3658 // If True is masked then the vmerge must have an all 1s mask, since we're
3659 // going to keep the mask from True.
3660 if (IsMasked) {
3661 assert(HasTiedDest && "Expected tied dest");
3662 // The vmerge instruction must be TU.
3663 if (isImplicitDef(Merge))
3664 return false;
3665 // FIXME: Support mask agnostic True instruction which would have an
3666 // undef merge operand.
3667 if (Mask && !usesAllOnesMask(Mask, Glue))
3668 return false;
3669 }
3670
3671 // Skip if True has side effect.
3672 // TODO: Support vleff and vlsegff.
3673 if (TII->get(TrueOpc).hasUnmodeledSideEffects())
3674 return false;
3675
3676 // The last operand of a masked instruction may be glued.
3677 bool HasGlueOp = True->getGluedNode() != nullptr;
3678
3679 // The chain operand may exist either before the glued operands or in the last
3680 // position.
3681 unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
3682 bool HasChainOp =
3683 True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
3684
3685 if (HasChainOp) {
3686 // Avoid creating cycles in the DAG. We must ensure that none of the other
3687 // operands depend on True through it's Chain.
3688 SmallVector<const SDNode *, 4> LoopWorklist;
3690 LoopWorklist.push_back(False.getNode());
3691 if (Mask)
3692 LoopWorklist.push_back(Mask.getNode());
3693 LoopWorklist.push_back(VL.getNode());
3694 if (Glue)
3695 LoopWorklist.push_back(Glue.getNode());
3696 if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
3697 return false;
3698 }
3699
3700 // The vector policy operand may be present for masked intrinsics
3701 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
3702 unsigned TrueVLIndex =
3703 True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3704 SDValue TrueVL = True.getOperand(TrueVLIndex);
3705 SDValue SEW = True.getOperand(TrueVLIndex + 1);
3706
3707 auto GetMinVL = [](SDValue LHS, SDValue RHS) {
3708 if (LHS == RHS)
3709 return LHS;
3710 if (isAllOnesConstant(LHS))
3711 return RHS;
3712 if (isAllOnesConstant(RHS))
3713 return LHS;
3714 auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
3715 auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
3716 if (!CLHS || !CRHS)
3717 return SDValue();
3718 return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
3719 };
3720
3721 // Because N and True must have the same merge operand (or True's operand is
3722 // implicit_def), the "effective" body is the minimum of their VLs.
3723 SDValue OrigVL = VL;
3724 VL = GetMinVL(TrueVL, VL);
3725 if (!VL)
3726 return false;
3727
3728 // If we end up changing the VL or mask of True, then we need to make sure it
3729 // doesn't raise any observable fp exceptions, since changing the active
3730 // elements will affect how fflags is set.
3731 if (TrueVL != VL || !IsMasked)
3732 if (mayRaiseFPException(True.getNode()) &&
3733 !True->getFlags().hasNoFPExcept())
3734 return false;
3735
3736 SDLoc DL(N);
3737
3738 // From the preconditions we checked above, we know the mask and thus glue
3739 // for the result node will be taken from True.
3740 if (IsMasked) {
3741 Mask = True->getOperand(Info->MaskOpIdx);
3742 Glue = True->getOperand(True->getNumOperands() - 1);
3743 assert(Glue.getValueType() == MVT::Glue);
3744 }
3745 // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create
3746 // an all-ones mask to use.
3747 else if (IsVMv(N)) {
3748 unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags;
3749 unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags));
3750 ElementCount EC = N->getValueType(0).getVectorElementCount();
3751 MVT MaskVT = MVT::getVectorVT(MVT::i1, EC);
3752
3753 SDValue AllOnesMask =
3754 SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0);
3756 RISCV::V0, AllOnesMask, SDValue());
3757 Mask = CurDAG->getRegister(RISCV::V0, MaskVT);
3758 Glue = MaskCopy.getValue(1);
3759 }
3760
3761 unsigned MaskedOpc = Info->MaskedPseudo;
3762#ifndef NDEBUG
3763 const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
3765 "Expected instructions with mask have policy operand.");
3766 assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
3767 MCOI::TIED_TO) == 0 &&
3768 "Expected instructions with mask have a tied dest.");
3769#endif
3770
3771 // Use a tumu policy, relaxing it to tail agnostic provided that the merge
3772 // operand is undefined.
3773 //
3774 // However, if the VL became smaller than what the vmerge had originally, then
3775 // elements past VL that were previously in the vmerge's body will have moved
3776 // to the tail. In that case we always need to use tail undisturbed to
3777 // preserve them.
3778 bool MergeVLShrunk = VL != OrigVL;
3779 uint64_t Policy = (isImplicitDef(Merge) && !MergeVLShrunk)
3781 : /*TUMU*/ 0;
3782 SDValue PolicyOp =
3783 CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
3784
3785
3787 Ops.push_back(False);
3788
3789 const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
3790 const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode;
3791 assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx);
3792 Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
3793
3794 Ops.push_back(Mask);
3795
3796 // For unmasked "VOp" with rounding mode operand, that is interfaces like
3797 // (..., rm, vl) or (..., rm, vl, policy).
3798 // Its masked version is (..., vm, rm, vl, policy).
3799 // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
3800 if (HasRoundingMode)
3801 Ops.push_back(True->getOperand(TrueVLIndex - 1));
3802
3803 Ops.append({VL, SEW, PolicyOp});
3804
3805 // Result node should have chain operand of True.
3806 if (HasChainOp)
3807 Ops.push_back(True.getOperand(TrueChainOpIdx));
3808
3809 // Add the glue for the CopyToReg of mask->v0.
3810 Ops.push_back(Glue);
3811
3813 CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
3814 Result->setFlags(True->getFlags());
3815
3816 if (!cast<MachineSDNode>(True)->memoperands_empty())
3817 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
3818
3819 // Replace vmerge.vvm node by Result.
3820 ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
3821
3822 // Replace another value of True. E.g. chain and VL.
3823 for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
3824 ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
3825
3826 return true;
3827}
3828
3829bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
3830 bool MadeChange = false;
3832
3833 while (Position != CurDAG->allnodes_begin()) {
3834 SDNode *N = &*--Position;
3835 if (N->use_empty() || !N->isMachineOpcode())
3836 continue;
3837
3838 if (IsVMerge(N) || IsVMv(N))
3839 MadeChange |= performCombineVMergeAndVOps(N);
3840 }
3841 return MadeChange;
3842}
3843
3844/// If our passthru is an implicit_def, use noreg instead. This side
3845/// steps issues with MachineCSE not being able to CSE expressions with
3846/// IMPLICIT_DEF operands while preserving the semantic intent. See
3847/// pr64282 for context. Note that this transform is the last one
3848/// performed at ISEL DAG to DAG.
3849bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
3850 bool MadeChange = false;
3852
3853 while (Position != CurDAG->allnodes_begin()) {
3854 SDNode *N = &*--Position;
3855 if (N->use_empty() || !N->isMachineOpcode())
3856 continue;
3857
3858 const unsigned Opc = N->getMachineOpcode();
3859 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
3861 !isImplicitDef(N->getOperand(0)))
3862 continue;
3863
3865 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
3866 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
3867 SDValue Op = N->getOperand(I);
3868 Ops.push_back(Op);
3869 }
3870
3872 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3873 Result->setFlags(N->getFlags());
3874 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
3875 ReplaceUses(N, Result);
3876 MadeChange = true;
3877 }
3878 return MadeChange;
3879}
3880
3881
3882// This pass converts a legalized DAG into a RISCV-specific DAG, ready
3883// for instruction scheduling.
3885 CodeGenOptLevel OptLevel) {
3886 return new RISCVDAGToDAGISel(TM, OptLevel);
3887}
3888
3889char RISCVDAGToDAGISel::ID = 0;
3890
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
R600 Clause Merge
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp)
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false)
static bool IsVMv(SDNode *N)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b)
static SDValue findVSplat(SDValue N)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm)
static bool IsVMerge(SDNode *N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define PASS_NAME
Value * RHS
Value * LHS
bool isZero() const
Definition: APFloat.h:1291
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
bool isPosZero() const
Definition: APFloat.h:1306
bool isNegZero() const
Definition: APFloat.h:1307
Class for arbitrary precision integers.
Definition: APInt.h:76
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
This class is used to form a handle around another node that is persistent and is updated across invo...
static StringRef getMemConstraintName(ConstraintCode C)
Definition: InlineAsm.h:467
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
Definition: MCInstrDesc.h:463
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
void selectVSSEG(SDNode *Node, bool IsMasked, bool IsStrided)
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset shoule be all zeros.
bool SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void selectVLSEGFF(SDNode *Node, bool IsMasked)
bool selectFPImm(SDValue N, SDValue &Imm)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
void selectVLXSEG(SDNode *Node, bool IsMasked, bool IsOrdered)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
void selectVSXSEG(SDNode *Node, bool IsMasked, bool IsOrdered)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset, bool IsINX=false)
void selectVLSEG(SDNode *Node, bool IsMasked, bool IsStrided)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
unsigned getXLen() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVII::VLMUL getLMUL(MVT VT)
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
const TargetLowering * TLI
MachineFunction * MF
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
bool mayRaiseFPException(SDNode *Node) const
Return whether the node may raise an FP exception.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:551
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:448
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:531
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:532
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:727
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:560
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
ilist< SDNode >::iterator allnodes_iterator
Definition: SelectionDAG.h:534
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
TargetInstrInfo - Interface to description of machine instruction set.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:559
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1241
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1031
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1221
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1237
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:627
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:573
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1472
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1523
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1568
static bool hasRoundModeOp(uint64_t TSFlags)
static VLMUL getLMul(uint64_t TSFlags)
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul)
unsigned encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic)
std::optional< unsigned > getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
static const MachineMemOperand::Flags MONontemporalBit1
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:228
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:258
unsigned M1(unsigned Val)
Definition: VE.h:376
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:313
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:246
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
@ Add
Sum of integers.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:452
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
bool hasNoFPExcept() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.