LLVM 22.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
22#include "llvm/IR/IntrinsicsRISCV.h"
24#include "llvm/Support/Debug.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "riscv-isel"
31#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
32
34 "riscv-use-rematerializable-movimm", cl::Hidden,
35 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
36 "constant materialization"),
37 cl::init(false));
38
39#define GET_DAGISEL_BODY RISCVDAGToDAGISel
40#include "RISCVGenDAGISel.inc"
41
43 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
44
45 bool MadeChange = false;
46 while (Position != CurDAG->allnodes_begin()) {
47 SDNode *N = &*--Position;
48 if (N->use_empty())
49 continue;
50
51 SDValue Result;
52 switch (N->getOpcode()) {
53 case ISD::SPLAT_VECTOR: {
54 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
55 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
56 MVT VT = N->getSimpleValueType(0);
57 unsigned Opc =
58 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
59 SDLoc DL(N);
60 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
61 SDValue Src = N->getOperand(0);
62 if (VT.isInteger())
63 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
64 N->getOperand(0));
65 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
66 break;
67 }
68 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
69 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
70 // load. Done after lowering and combining so that we have a chance to
71 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
72 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
73 MVT VT = N->getSimpleValueType(0);
74 SDValue Passthru = N->getOperand(0);
75 SDValue Lo = N->getOperand(1);
76 SDValue Hi = N->getOperand(2);
77 SDValue VL = N->getOperand(3);
78 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
79 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
80 "Unexpected VTs!");
81 MachineFunction &MF = CurDAG->getMachineFunction();
82 SDLoc DL(N);
83
84 // Create temporary stack for each expanding node.
85 SDValue StackSlot =
86 CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
87 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
89
90 SDValue Chain = CurDAG->getEntryNode();
91 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
92
93 SDValue OffsetSlot =
94 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
95 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
96 Align(8));
97
98 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
99
100 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
101 SDValue IntID =
102 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
103 SDValue Ops[] = {Chain,
104 IntID,
105 Passthru,
106 StackSlot,
107 CurDAG->getRegister(RISCV::X0, MVT::i64),
108 VL};
109
110 Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
111 MVT::i64, MPI, Align(8),
113 break;
114 }
115 case ISD::FP_EXTEND: {
116 // We only have vector patterns for riscv_fpextend_vl in isel.
117 SDLoc DL(N);
118 MVT VT = N->getSimpleValueType(0);
119 if (!VT.isVector())
120 break;
121 SDValue VLMAX = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
122 SDValue TrueMask = CurDAG->getNode(
123 RISCVISD::VMSET_VL, DL, VT.changeVectorElementType(MVT::i1), VLMAX);
124 Result = CurDAG->getNode(RISCVISD::FP_EXTEND_VL, DL, VT, N->getOperand(0),
125 TrueMask, VLMAX);
126 break;
127 }
128 }
129
130 if (Result) {
131 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
132 LLVM_DEBUG(N->dump(CurDAG));
133 LLVM_DEBUG(dbgs() << "\nNew: ");
134 LLVM_DEBUG(Result->dump(CurDAG));
135 LLVM_DEBUG(dbgs() << "\n");
136
137 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
138 MadeChange = true;
139 }
140 }
141
142 if (MadeChange)
143 CurDAG->RemoveDeadNodes();
144}
145
147 HandleSDNode Dummy(CurDAG->getRoot());
148 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
149
150 bool MadeChange = false;
151 while (Position != CurDAG->allnodes_begin()) {
152 SDNode *N = &*--Position;
153 // Skip dead nodes and any non-machine opcodes.
154 if (N->use_empty() || !N->isMachineOpcode())
155 continue;
156
157 MadeChange |= doPeepholeSExtW(N);
158
159 // FIXME: This is here only because the VMerge transform doesn't
160 // know how to handle masked true inputs. Once that has been moved
161 // to post-ISEL, this can be deleted as well.
162 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
163 }
164
165 CurDAG->setRoot(Dummy.getValue());
166
167 // After we're done with everything else, convert IMPLICIT_DEF
168 // passthru operands to NoRegister. This is required to workaround
169 // an optimization deficiency in MachineCSE. This really should
170 // be merged back into each of the patterns (i.e. there's no good
171 // reason not to go directly to NoReg), but is being done this way
172 // to allow easy backporting.
173 MadeChange |= doPeepholeNoRegPassThru();
174
175 if (MadeChange)
176 CurDAG->RemoveDeadNodes();
177}
178
179static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
181 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
182 for (const RISCVMatInt::Inst &Inst : Seq) {
183 SDValue SDImm = CurDAG->getSignedTargetConstant(Inst.getImm(), DL, VT);
184 SDNode *Result = nullptr;
185 switch (Inst.getOpndKind()) {
186 case RISCVMatInt::Imm:
187 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
188 break;
190 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
191 CurDAG->getRegister(RISCV::X0, VT));
192 break;
194 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
195 break;
197 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
198 break;
199 }
200
201 // Only the first instruction has X0 as its source.
202 SrcReg = SDValue(Result, 0);
203 }
204
205 return SrcReg;
206}
207
208static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
209 int64_t Imm, const RISCVSubtarget &Subtarget) {
211
212 // Use a rematerializable pseudo instruction for short sequences if enabled.
213 if (Seq.size() == 2 && UsePseudoMovImm)
214 return SDValue(
215 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
216 CurDAG->getSignedTargetConstant(Imm, DL, VT)),
217 0);
218
219 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
220 // worst an LUI+ADDIW. This will require an extra register, but avoids a
221 // constant pool.
222 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
223 // low and high 32 bits are the same and bit 31 and 63 are set.
224 if (Seq.size() > 3) {
225 unsigned ShiftAmt, AddOpc;
227 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
228 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
229 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
230
231 SDValue SLLI = SDValue(
232 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
233 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
234 0);
235 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
236 }
237 }
238
239 // Otherwise, use the original sequence.
240 return selectImmSeq(CurDAG, DL, VT, Seq);
241}
242
244 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
245 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
246 bool IsLoad, MVT *IndexVT) {
247 SDValue Chain = Node->getOperand(0);
248
249 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
250
251 if (IsStridedOrIndexed) {
252 Operands.push_back(Node->getOperand(CurOp++)); // Index.
253 if (IndexVT)
254 *IndexVT = Operands.back()->getSimpleValueType(0);
255 }
256
257 if (IsMasked) {
258 SDValue Mask = Node->getOperand(CurOp++);
259 Operands.push_back(Mask);
260 }
261 SDValue VL;
262 selectVLOp(Node->getOperand(CurOp++), VL);
263 Operands.push_back(VL);
264
265 MVT XLenVT = Subtarget->getXLenVT();
266 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
267 Operands.push_back(SEWOp);
268
269 // At the IR layer, all the masked load intrinsics have policy operands,
270 // none of the others do. All have passthru operands. For our pseudos,
271 // all loads have policy operands.
272 if (IsLoad) {
274 if (IsMasked)
275 Policy = Node->getConstantOperandVal(CurOp++);
276 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
277 Operands.push_back(PolicyOp);
278 }
279
280 Operands.push_back(Chain); // Chain.
281}
282
283void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
284 bool IsStrided) {
285 SDLoc DL(Node);
286 MVT VT = Node->getSimpleValueType(0);
287 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
289
290 unsigned CurOp = 2;
292
293 Operands.push_back(Node->getOperand(CurOp++));
294
295 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
296 Operands, /*IsLoad=*/true);
297
298 const RISCV::VLSEGPseudo *P =
299 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
300 static_cast<unsigned>(LMUL));
301 MachineSDNode *Load =
302 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
303
304 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
305
306 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
307 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
308 CurDAG->RemoveDeadNode(Node);
309}
310
312 bool IsMasked) {
313 SDLoc DL(Node);
314 MVT VT = Node->getSimpleValueType(0);
315 MVT XLenVT = Subtarget->getXLenVT();
316 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
318
319 unsigned CurOp = 2;
321
322 Operands.push_back(Node->getOperand(CurOp++));
323
324 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
325 /*IsStridedOrIndexed*/ false, Operands,
326 /*IsLoad=*/true);
327
328 const RISCV::VLSEGPseudo *P =
329 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
330 Log2SEW, static_cast<unsigned>(LMUL));
331 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
332 XLenVT, MVT::Other, Operands);
333
334 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
335
336 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); // Result
337 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); // VL
338 ReplaceUses(SDValue(Node, 2), SDValue(Load, 2)); // Chain
339 CurDAG->RemoveDeadNode(Node);
340}
341
342void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
343 bool IsOrdered) {
344 SDLoc DL(Node);
345 MVT VT = Node->getSimpleValueType(0);
346 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
348
349 unsigned CurOp = 2;
351
352 Operands.push_back(Node->getOperand(CurOp++));
353
354 MVT IndexVT;
355 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
356 /*IsStridedOrIndexed*/ true, Operands,
357 /*IsLoad=*/true, &IndexVT);
358
359#ifndef NDEBUG
360 // Number of element = RVVBitsPerBlock * LMUL / SEW
361 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
362 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
363 if (DecodedLMUL.second)
364 ContainedTyNumElts /= DecodedLMUL.first;
365 else
366 ContainedTyNumElts *= DecodedLMUL.first;
367 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
368 "Element count mismatch");
369#endif
370
372 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
373 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
374 reportFatalUsageError("The V extension does not support EEW=64 for index "
375 "values when XLEN=32");
376 }
377 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
378 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
379 static_cast<unsigned>(IndexLMUL));
380 MachineSDNode *Load =
381 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
382
383 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
384
385 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
386 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
387 CurDAG->RemoveDeadNode(Node);
388}
389
390void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
391 bool IsStrided) {
392 SDLoc DL(Node);
393 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
394 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
396
397 unsigned CurOp = 2;
399
400 Operands.push_back(Node->getOperand(CurOp++));
401
402 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
403 Operands);
404
405 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
406 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
407 MachineSDNode *Store =
408 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
409
410 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
411
412 ReplaceNode(Node, Store);
413}
414
415void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
416 bool IsOrdered) {
417 SDLoc DL(Node);
418 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
419 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
421
422 unsigned CurOp = 2;
424
425 Operands.push_back(Node->getOperand(CurOp++));
426
427 MVT IndexVT;
428 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
429 /*IsStridedOrIndexed*/ true, Operands,
430 /*IsLoad=*/false, &IndexVT);
431
432#ifndef NDEBUG
433 // Number of element = RVVBitsPerBlock * LMUL / SEW
434 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
435 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
436 if (DecodedLMUL.second)
437 ContainedTyNumElts /= DecodedLMUL.first;
438 else
439 ContainedTyNumElts *= DecodedLMUL.first;
440 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
441 "Element count mismatch");
442#endif
443
445 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
446 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
447 reportFatalUsageError("The V extension does not support EEW=64 for index "
448 "values when XLEN=32");
449 }
450 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
451 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
452 static_cast<unsigned>(IndexLMUL));
453 MachineSDNode *Store =
454 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
455
456 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
457
458 ReplaceNode(Node, Store);
459}
460
462 if (!Subtarget->hasVInstructions())
463 return;
464
465 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
466
467 SDLoc DL(Node);
468 MVT XLenVT = Subtarget->getXLenVT();
469
470 unsigned IntNo = Node->getConstantOperandVal(0);
471
472 assert((IntNo == Intrinsic::riscv_vsetvli ||
473 IntNo == Intrinsic::riscv_vsetvlimax) &&
474 "Unexpected vsetvli intrinsic");
475
476 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
477 unsigned Offset = (VLMax ? 1 : 2);
478
479 assert(Node->getNumOperands() == Offset + 2 &&
480 "Unexpected number of operands");
481
482 unsigned SEW =
483 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
484 RISCVVType::VLMUL VLMul = static_cast<RISCVVType::VLMUL>(
485 Node->getConstantOperandVal(Offset + 1) & 0x7);
486
487 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
488 /*MaskAgnostic*/ true);
489 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
490
491 SDValue VLOperand;
492 unsigned Opcode = RISCV::PseudoVSETVLI;
493 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
494 if (auto VLEN = Subtarget->getRealVLen())
495 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
496 VLMax = true;
497 }
498 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
499 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
500 Opcode = RISCV::PseudoVSETVLIX0;
501 } else {
502 VLOperand = Node->getOperand(1);
503
504 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
505 uint64_t AVL = C->getZExtValue();
506 if (isUInt<5>(AVL)) {
507 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
508 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
509 XLenVT, VLImm, VTypeIOp));
510 return;
511 }
512 }
513 }
514
516 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
517}
518
520 if (!Subtarget->hasVendorXSfmmbase())
521 return;
522
523 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
524
525 SDLoc DL(Node);
526 MVT XLenVT = Subtarget->getXLenVT();
527
528 unsigned IntNo = Node->getConstantOperandVal(0);
529
530 assert((IntNo == Intrinsic::riscv_sf_vsettnt ||
531 IntNo == Intrinsic::riscv_sf_vsettm ||
532 IntNo == Intrinsic::riscv_sf_vsettk) &&
533 "Unexpected XSfmm vset intrinsic");
534
535 unsigned SEW = RISCVVType::decodeVSEW(Node->getConstantOperandVal(2));
536 unsigned Widen = RISCVVType::decodeTWiden(Node->getConstantOperandVal(3));
537 unsigned PseudoOpCode =
538 IntNo == Intrinsic::riscv_sf_vsettnt ? RISCV::PseudoSF_VSETTNT
539 : IntNo == Intrinsic::riscv_sf_vsettm ? RISCV::PseudoSF_VSETTM
540 : RISCV::PseudoSF_VSETTK;
541
542 if (IntNo == Intrinsic::riscv_sf_vsettnt) {
543 unsigned VTypeI = RISCVVType::encodeXSfmmVType(SEW, Widen, 0);
544 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
545
546 ReplaceNode(Node, CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
547 Node->getOperand(1), VTypeIOp));
548 } else {
549 SDValue Log2SEW = CurDAG->getTargetConstant(Log2_32(SEW), DL, XLenVT);
550 SDValue TWiden = CurDAG->getTargetConstant(Widen, DL, XLenVT);
552 CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
553 Node->getOperand(1), Log2SEW, TWiden));
554 }
555}
556
558 MVT VT = Node->getSimpleValueType(0);
559 unsigned Opcode = Node->getOpcode();
560 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
561 "Unexpected opcode");
562 SDLoc DL(Node);
563
564 // For operations of the form (x << C1) op C2, check if we can use
565 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
566 SDValue N0 = Node->getOperand(0);
567 SDValue N1 = Node->getOperand(1);
568
570 if (!Cst)
571 return false;
572
573 int64_t Val = Cst->getSExtValue();
574
575 // Check if immediate can already use ANDI/ORI/XORI.
576 if (isInt<12>(Val))
577 return false;
578
579 SDValue Shift = N0;
580
581 // If Val is simm32 and we have a sext_inreg from i32, then the binop
582 // produces at least 33 sign bits. We can peek through the sext_inreg and use
583 // a SLLIW at the end.
584 bool SignExt = false;
585 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
586 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
587 SignExt = true;
588 Shift = N0.getOperand(0);
589 }
590
591 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
592 return false;
593
595 if (!ShlCst)
596 return false;
597
598 uint64_t ShAmt = ShlCst->getZExtValue();
599
600 // Make sure that we don't change the operation by removing bits.
601 // This only matters for OR and XOR, AND is unaffected.
602 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
603 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
604 return false;
605
606 int64_t ShiftedVal = Val >> ShAmt;
607 if (!isInt<12>(ShiftedVal))
608 return false;
609
610 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
611 if (SignExt && ShAmt >= 32)
612 return false;
613
614 // Ok, we can reorder to get a smaller immediate.
615 unsigned BinOpc;
616 switch (Opcode) {
617 default: llvm_unreachable("Unexpected opcode");
618 case ISD::AND: BinOpc = RISCV::ANDI; break;
619 case ISD::OR: BinOpc = RISCV::ORI; break;
620 case ISD::XOR: BinOpc = RISCV::XORI; break;
621 }
622
623 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
624
625 SDNode *BinOp = CurDAG->getMachineNode(
626 BinOpc, DL, VT, Shift.getOperand(0),
627 CurDAG->getSignedTargetConstant(ShiftedVal, DL, VT));
628 SDNode *SLLI =
629 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
630 CurDAG->getTargetConstant(ShAmt, DL, VT));
631 ReplaceNode(Node, SLLI);
632 return true;
633}
634
636 unsigned Opc;
637
638 if (Subtarget->hasVendorXTHeadBb())
639 Opc = RISCV::TH_EXT;
640 else if (Subtarget->hasVendorXAndesPerf())
641 Opc = RISCV::NDS_BFOS;
642 else if (Subtarget->hasVendorXqcibm())
643 Opc = RISCV::QC_EXT;
644 else
645 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
646 return false;
647
648 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
649 if (!N1C)
650 return false;
651
652 SDValue N0 = Node->getOperand(0);
653 if (!N0.hasOneUse())
654 return false;
655
656 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb,
657 const SDLoc &DL, MVT VT) {
658 if (Opc == RISCV::QC_EXT) {
659 // QC.EXT X, width, shamt
660 // shamt is the same as Lsb
661 // width is the number of bits to extract from the Lsb
662 Msb = Msb - Lsb + 1;
663 }
664 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
665 CurDAG->getTargetConstant(Msb, DL, VT),
666 CurDAG->getTargetConstant(Lsb, DL, VT));
667 };
668
669 SDLoc DL(Node);
670 MVT VT = Node->getSimpleValueType(0);
671 const unsigned RightShAmt = N1C->getZExtValue();
672
673 // Transform (sra (shl X, C1) C2) with C1 < C2
674 // -> (SignedBitfieldExtract X, msb, lsb)
675 if (N0.getOpcode() == ISD::SHL) {
676 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
677 if (!N01C)
678 return false;
679
680 const unsigned LeftShAmt = N01C->getZExtValue();
681 // Make sure that this is a bitfield extraction (i.e., the shift-right
682 // amount can not be less than the left-shift).
683 if (LeftShAmt > RightShAmt)
684 return false;
685
686 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
687 const unsigned Msb = MsbPlusOne - 1;
688 const unsigned Lsb = RightShAmt - LeftShAmt;
689
690 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
691 ReplaceNode(Node, Sbe);
692 return true;
693 }
694
695 // Transform (sra (sext_inreg X, _), C) ->
696 // (SignedBitfieldExtract X, msb, lsb)
697 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
698 unsigned ExtSize =
699 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
700
701 // ExtSize of 32 should use sraiw via tablegen pattern.
702 if (ExtSize == 32)
703 return false;
704
705 const unsigned Msb = ExtSize - 1;
706 // If the shift-right amount is greater than Msb, it means that extracts
707 // the X[Msb] bit and sign-extend it.
708 const unsigned Lsb = RightShAmt > Msb ? Msb : RightShAmt;
709
710 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
711 ReplaceNode(Node, Sbe);
712 return true;
713 }
714
715 return false;
716}
717
719 // Only supported with XAndesPerf at the moment.
720 if (!Subtarget->hasVendorXAndesPerf())
721 return false;
722
723 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
724 if (!N1C)
725 return false;
726
727 SDValue N0 = Node->getOperand(0);
728 if (!N0.hasOneUse())
729 return false;
730
731 auto BitfieldInsert = [&](SDValue N0, unsigned Msb, unsigned Lsb,
732 const SDLoc &DL, MVT VT) {
733 unsigned Opc = RISCV::NDS_BFOS;
734 // If the Lsb is equal to the Msb, then the Lsb should be 0.
735 if (Lsb == Msb)
736 Lsb = 0;
737 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
738 CurDAG->getTargetConstant(Lsb, DL, VT),
739 CurDAG->getTargetConstant(Msb, DL, VT));
740 };
741
742 SDLoc DL(Node);
743 MVT VT = Node->getSimpleValueType(0);
744 const unsigned RightShAmt = N1C->getZExtValue();
745
746 // Transform (sra (shl X, C1) C2) with C1 > C2
747 // -> (NDS.BFOS X, lsb, msb)
748 if (N0.getOpcode() == ISD::SHL) {
749 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
750 if (!N01C)
751 return false;
752
753 const unsigned LeftShAmt = N01C->getZExtValue();
754 // Make sure that this is a bitfield insertion (i.e., the shift-right
755 // amount should be less than the left-shift).
756 if (LeftShAmt <= RightShAmt)
757 return false;
758
759 const unsigned MsbPlusOne = VT.getSizeInBits() - RightShAmt;
760 const unsigned Msb = MsbPlusOne - 1;
761 const unsigned Lsb = LeftShAmt - RightShAmt;
762
763 SDNode *Sbi = BitfieldInsert(N0, Msb, Lsb, DL, VT);
764 ReplaceNode(Node, Sbi);
765 return true;
766 }
767
768 return false;
769}
770
772 const SDLoc &DL, MVT VT,
773 SDValue X, unsigned Msb,
774 unsigned Lsb) {
775 unsigned Opc;
776
777 if (Subtarget->hasVendorXTHeadBb()) {
778 Opc = RISCV::TH_EXTU;
779 } else if (Subtarget->hasVendorXAndesPerf()) {
780 Opc = RISCV::NDS_BFOZ;
781 } else if (Subtarget->hasVendorXqcibm()) {
782 Opc = RISCV::QC_EXTU;
783 // QC.EXTU X, width, shamt
784 // shamt is the same as Lsb
785 // width is the number of bits to extract from the Lsb
786 Msb = Msb - Lsb + 1;
787 } else {
788 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
789 return false;
790 }
791
792 SDNode *Ube = CurDAG->getMachineNode(Opc, DL, VT, X,
793 CurDAG->getTargetConstant(Msb, DL, VT),
794 CurDAG->getTargetConstant(Lsb, DL, VT));
795 ReplaceNode(Node, Ube);
796 return true;
797}
798
800 const SDLoc &DL, MVT VT,
801 SDValue X, unsigned Msb,
802 unsigned Lsb) {
803 // Only supported with XAndesPerf at the moment.
804 if (!Subtarget->hasVendorXAndesPerf())
805 return false;
806
807 unsigned Opc = RISCV::NDS_BFOZ;
808
809 // If the Lsb is equal to the Msb, then the Lsb should be 0.
810 if (Lsb == Msb)
811 Lsb = 0;
812 SDNode *Ubi = CurDAG->getMachineNode(Opc, DL, VT, X,
813 CurDAG->getTargetConstant(Lsb, DL, VT),
814 CurDAG->getTargetConstant(Msb, DL, VT));
815 ReplaceNode(Node, Ubi);
816 return true;
817}
818
820 // Target does not support indexed loads.
821 if (!Subtarget->hasVendorXTHeadMemIdx())
822 return false;
823
826 if (AM == ISD::UNINDEXED)
827 return false;
828
830 if (!C)
831 return false;
832
833 EVT LoadVT = Ld->getMemoryVT();
834 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
835 "Unexpected addressing mode");
836 bool IsPre = AM == ISD::PRE_INC;
837 bool IsPost = AM == ISD::POST_INC;
838 int64_t Offset = C->getSExtValue();
839
840 // The constants that can be encoded in the THeadMemIdx instructions
841 // are of the form (sign_extend(imm5) << imm2).
842 unsigned Shift;
843 for (Shift = 0; Shift < 4; Shift++)
844 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
845 break;
846
847 // Constant cannot be encoded.
848 if (Shift == 4)
849 return false;
850
851 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
852 unsigned Opcode;
853 if (LoadVT == MVT::i8 && IsPre)
854 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
855 else if (LoadVT == MVT::i8 && IsPost)
856 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
857 else if (LoadVT == MVT::i16 && IsPre)
858 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
859 else if (LoadVT == MVT::i16 && IsPost)
860 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
861 else if (LoadVT == MVT::i32 && IsPre)
862 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
863 else if (LoadVT == MVT::i32 && IsPost)
864 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
865 else if (LoadVT == MVT::i64 && IsPre)
866 Opcode = RISCV::TH_LDIB;
867 else if (LoadVT == MVT::i64 && IsPost)
868 Opcode = RISCV::TH_LDIA;
869 else
870 return false;
871
872 EVT Ty = Ld->getOffset().getValueType();
873 SDValue Ops[] = {
874 Ld->getBasePtr(),
875 CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
876 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), Ld->getChain()};
877 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
878 Ld->getValueType(1), MVT::Other, Ops);
879
880 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
881 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
882
883 ReplaceNode(Node, New);
884
885 return true;
886}
887
888static Register getTileReg(uint64_t TileNum) {
889 assert(TileNum <= 15 && "Invalid tile number");
890 return RISCV::T0 + TileNum;
891}
892
894 if (!Subtarget->hasVInstructions())
895 return;
896
897 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
898
899 SDLoc DL(Node);
900 unsigned IntNo = Node->getConstantOperandVal(1);
901
902 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
903 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
904 "Unexpected vsetvli intrinsic");
905
906 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
907 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
908 SDValue SEWOp =
909 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
910 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
911 Node->getOperand(4), Node->getOperand(5),
912 Node->getOperand(8), SEWOp,
913 Node->getOperand(0)};
914
915 unsigned Opcode;
916 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
917 switch (LMulSDNode->getSExtValue()) {
918 case 5:
919 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF8
920 : RISCV::PseudoSF_VC_I_SE_MF8;
921 break;
922 case 6:
923 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF4
924 : RISCV::PseudoSF_VC_I_SE_MF4;
925 break;
926 case 7:
927 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF2
928 : RISCV::PseudoSF_VC_I_SE_MF2;
929 break;
930 case 0:
931 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M1
932 : RISCV::PseudoSF_VC_I_SE_M1;
933 break;
934 case 1:
935 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M2
936 : RISCV::PseudoSF_VC_I_SE_M2;
937 break;
938 case 2:
939 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M4
940 : RISCV::PseudoSF_VC_I_SE_M4;
941 break;
942 case 3:
943 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M8
944 : RISCV::PseudoSF_VC_I_SE_M8;
945 break;
946 }
947
948 ReplaceNode(Node, CurDAG->getMachineNode(
949 Opcode, DL, Node->getSimpleValueType(0), Operands));
950}
951
952static unsigned getSegInstNF(unsigned Intrinsic) {
953#define INST_NF_CASE(NAME, NF) \
954 case Intrinsic::riscv_##NAME##NF: \
955 return NF;
956#define INST_NF_CASE_MASK(NAME, NF) \
957 case Intrinsic::riscv_##NAME##NF##_mask: \
958 return NF;
959#define INST_NF_CASE_FF(NAME, NF) \
960 case Intrinsic::riscv_##NAME##NF##ff: \
961 return NF;
962#define INST_NF_CASE_FF_MASK(NAME, NF) \
963 case Intrinsic::riscv_##NAME##NF##ff_mask: \
964 return NF;
965#define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
966 MACRO_NAME(NAME, 2) \
967 MACRO_NAME(NAME, 3) \
968 MACRO_NAME(NAME, 4) \
969 MACRO_NAME(NAME, 5) \
970 MACRO_NAME(NAME, 6) \
971 MACRO_NAME(NAME, 7) \
972 MACRO_NAME(NAME, 8)
973#define INST_ALL_NF_CASE(NAME) \
974 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
975 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
976#define INST_ALL_NF_CASE_WITH_FF(NAME) \
977 INST_ALL_NF_CASE(NAME) \
978 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
979 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
980 switch (Intrinsic) {
981 default:
982 llvm_unreachable("Unexpected segment load/store intrinsic");
984 INST_ALL_NF_CASE(vlsseg)
985 INST_ALL_NF_CASE(vloxseg)
986 INST_ALL_NF_CASE(vluxseg)
987 INST_ALL_NF_CASE(vsseg)
988 INST_ALL_NF_CASE(vssseg)
989 INST_ALL_NF_CASE(vsoxseg)
990 INST_ALL_NF_CASE(vsuxseg)
991 }
992}
993
994static bool isApplicableToPLI(int Val) {
995 // Check if the immediate is packed i8 or i10
996 int16_t Bit31To16 = Val >> 16;
997 int16_t Bit15To0 = Val;
998 int8_t Bit15To8 = Bit15To0 >> 8;
999 int8_t Bit7To0 = Val;
1000 if (Bit31To16 != Bit15To0)
1001 return false;
1002
1003 return isInt<10>(Bit31To16) || Bit15To8 == Bit7To0;
1004}
1005
1007 // If we have a custom node, we have already selected.
1008 if (Node->isMachineOpcode()) {
1009 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
1010 Node->setNodeId(-1);
1011 return;
1012 }
1013
1014 // Instruction Selection not handled by the auto-generated tablegen selection
1015 // should be handled here.
1016 unsigned Opcode = Node->getOpcode();
1017 MVT XLenVT = Subtarget->getXLenVT();
1018 SDLoc DL(Node);
1019 MVT VT = Node->getSimpleValueType(0);
1020
1021 bool HasBitTest = Subtarget->hasBEXTILike();
1022
1023 switch (Opcode) {
1024 case ISD::Constant: {
1025 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
1026 auto *ConstNode = cast<ConstantSDNode>(Node);
1027 if (ConstNode->isZero()) {
1028 SDValue New =
1029 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
1030 ReplaceNode(Node, New.getNode());
1031 return;
1032 }
1033 int64_t Imm = ConstNode->getSExtValue();
1034 // If only the lower 8 bits are used, try to convert this to a simm6 by
1035 // sign-extending bit 7. This is neutral without the C extension, and
1036 // allows C.LI to be used if C is present.
1037 if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
1038 Imm = SignExtend64<8>(Imm);
1039 // If the upper XLen-16 bits are not used, try to convert this to a simm12
1040 // by sign extending bit 15.
1041 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
1043 Imm = SignExtend64<16>(Imm);
1044 // If the upper 32-bits are not used try to convert this into a simm32 by
1045 // sign extending bit 32.
1046 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
1047 Imm = SignExtend64<32>(Imm);
1048
1049 if (Subtarget->enablePExtCodeGen() && isApplicableToPLI(Imm) &&
1050 hasAllWUsers(Node)) {
1051 // If it's 4 packed 8-bit integers or 2 packed signed 16-bit integers, we
1052 // can simply copy lower 32 bits to higher 32 bits to make it able to
1053 // rematerialize to PLI_B or PLI_H
1054 Imm = ((uint64_t)Imm << 32) | (Imm & 0xFFFFFFFF);
1055 }
1056
1057 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
1058 return;
1059 }
1060 case ISD::ConstantFP: {
1061 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
1062
1063 bool Is64Bit = Subtarget->is64Bit();
1064 bool HasZdinx = Subtarget->hasStdExtZdinx();
1065
1066 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
1067 SDValue Imm;
1068 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
1069 // create an integer immediate.
1070 if (APF.isPosZero() || NegZeroF64) {
1071 if (VT == MVT::f64 && HasZdinx && !Is64Bit)
1072 Imm = CurDAG->getRegister(RISCV::X0_Pair, MVT::f64);
1073 else
1074 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
1075 } else {
1076 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
1077 *Subtarget);
1078 }
1079
1080 unsigned Opc;
1081 switch (VT.SimpleTy) {
1082 default:
1083 llvm_unreachable("Unexpected size");
1084 case MVT::bf16:
1085 assert(Subtarget->hasStdExtZfbfmin());
1086 Opc = RISCV::FMV_H_X;
1087 break;
1088 case MVT::f16:
1089 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
1090 break;
1091 case MVT::f32:
1092 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
1093 break;
1094 case MVT::f64:
1095 // For RV32, we can't move from a GPR, we need to convert instead. This
1096 // should only happen for +0.0 and -0.0.
1097 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
1098 if (HasZdinx)
1099 Opc = RISCV::COPY;
1100 else
1101 Opc = Is64Bit ? RISCV::FMV_D_X : RISCV::FCVT_D_W;
1102 break;
1103 }
1104
1105 SDNode *Res;
1106 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
1107 Res =
1108 CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
1109 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
1110 Res =
1111 CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
1112 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
1113 Res = CurDAG->getMachineNode(
1114 Opc, DL, VT, Imm,
1115 CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
1116 else
1117 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1118
1119 // For f64 -0.0, we need to insert a fneg.d idiom.
1120 if (NegZeroF64) {
1121 Opc = RISCV::FSGNJN_D;
1122 if (HasZdinx)
1123 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1124 Res =
1125 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1126 }
1127
1128 ReplaceNode(Node, Res);
1129 return;
1130 }
1131 case RISCVISD::BuildGPRPair:
1132 case RISCVISD::BuildPairF64: {
1133 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
1134 break;
1135
1136 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) &&
1137 "BuildPairF64 only handled here on rv32i_zdinx");
1138
1139 SDValue Ops[] = {
1140 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1141 Node->getOperand(0),
1142 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1143 Node->getOperand(1),
1144 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1145
1146 SDNode *N = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
1147 ReplaceNode(Node, N);
1148 return;
1149 }
1150 case RISCVISD::SplitGPRPair:
1151 case RISCVISD::SplitF64: {
1152 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
1153 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) &&
1154 "SplitF64 only handled here on rv32i_zdinx");
1155
1156 if (!SDValue(Node, 0).use_empty()) {
1157 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1158 Node->getValueType(0),
1159 Node->getOperand(0));
1160 ReplaceUses(SDValue(Node, 0), Lo);
1161 }
1162
1163 if (!SDValue(Node, 1).use_empty()) {
1164 SDValue Hi = CurDAG->getTargetExtractSubreg(
1165 RISCV::sub_gpr_odd, DL, Node->getValueType(1), Node->getOperand(0));
1166 ReplaceUses(SDValue(Node, 1), Hi);
1167 }
1168
1169 CurDAG->RemoveDeadNode(Node);
1170 return;
1171 }
1172
1173 assert(Opcode != RISCVISD::SplitGPRPair &&
1174 "SplitGPRPair should already be handled");
1175
1176 if (!Subtarget->hasStdExtZfa())
1177 break;
1178 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1179 "Unexpected subtarget");
1180
1181 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1182 if (!SDValue(Node, 0).use_empty()) {
1183 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1184 Node->getOperand(0));
1185 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1186 }
1187 if (!SDValue(Node, 1).use_empty()) {
1188 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1189 Node->getOperand(0));
1190 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1191 }
1192
1193 CurDAG->RemoveDeadNode(Node);
1194 return;
1195 }
1196 case ISD::SHL: {
1197 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1198 if (!N1C)
1199 break;
1200 SDValue N0 = Node->getOperand(0);
1201 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1203 break;
1204 unsigned ShAmt = N1C->getZExtValue();
1205 uint64_t Mask = N0.getConstantOperandVal(1);
1206
1207 if (isShiftedMask_64(Mask)) {
1208 unsigned XLen = Subtarget->getXLen();
1209 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1210 unsigned TrailingZeros = llvm::countr_zero(Mask);
1211 if (ShAmt <= 32 && TrailingZeros > 0 && LeadingZeros == 32) {
1212 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1213 // where C2 has 32 leading zeros and C3 trailing zeros.
1214 SDNode *SRLIW = CurDAG->getMachineNode(
1215 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1216 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1217 SDNode *SLLI = CurDAG->getMachineNode(
1218 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1219 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1220 ReplaceNode(Node, SLLI);
1221 return;
1222 }
1223 if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1224 XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1225 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1226 // where C2 has C4 leading zeros and no trailing zeros.
1227 // This is profitable if the "and" was to be lowered to
1228 // (srli (slli X, C4), C4) and not (andi X, C2).
1229 // For "LeadingZeros == 32":
1230 // - with Zba it's just (slli.uw X, C)
1231 // - without Zba a tablegen pattern applies the very same
1232 // transform as we would have done here
1233 SDNode *SLLI = CurDAG->getMachineNode(
1234 RISCV::SLLI, DL, VT, N0.getOperand(0),
1235 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1236 SDNode *SRLI = CurDAG->getMachineNode(
1237 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1238 CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
1239 ReplaceNode(Node, SRLI);
1240 return;
1241 }
1242 }
1243 break;
1244 }
1245 case ISD::SRL: {
1246 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1247 if (!N1C)
1248 break;
1249 SDValue N0 = Node->getOperand(0);
1250 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1251 break;
1252 unsigned ShAmt = N1C->getZExtValue();
1253 uint64_t Mask = N0.getConstantOperandVal(1);
1254
1255 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1256 // 32 leading zeros and C3 trailing zeros.
1257 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1258 unsigned XLen = Subtarget->getXLen();
1259 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1260 unsigned TrailingZeros = llvm::countr_zero(Mask);
1261 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1262 SDNode *SRLIW = CurDAG->getMachineNode(
1263 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1264 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1265 SDNode *SLLI = CurDAG->getMachineNode(
1266 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1267 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1268 ReplaceNode(Node, SLLI);
1269 return;
1270 }
1271 }
1272
1273 // Optimize (srl (and X, C2), C) ->
1274 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1275 // Where C2 is a mask with C3 trailing ones.
1276 // Taking into account that the C2 may have had lower bits unset by
1277 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1278 // This pattern occurs when type legalizing right shifts for types with
1279 // less than XLen bits.
1280 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1281 if (!isMask_64(Mask))
1282 break;
1283 unsigned TrailingOnes = llvm::countr_one(Mask);
1284 if (ShAmt >= TrailingOnes)
1285 break;
1286 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1287 if (TrailingOnes == 32) {
1288 SDNode *SRLI = CurDAG->getMachineNode(
1289 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1290 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1291 ReplaceNode(Node, SRLI);
1292 return;
1293 }
1294
1295 // Only do the remaining transforms if the AND has one use.
1296 if (!N0.hasOneUse())
1297 break;
1298
1299 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1300 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1301 SDNode *BEXTI = CurDAG->getMachineNode(
1302 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1303 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1304 ReplaceNode(Node, BEXTI);
1305 return;
1306 }
1307
1308 const unsigned Msb = TrailingOnes - 1;
1309 const unsigned Lsb = ShAmt;
1310 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0.getOperand(0), Msb, Lsb))
1311 return;
1312
1313 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1314 SDNode *SLLI =
1315 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1316 CurDAG->getTargetConstant(LShAmt, DL, VT));
1317 SDNode *SRLI = CurDAG->getMachineNode(
1318 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1319 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1320 ReplaceNode(Node, SRLI);
1321 return;
1322 }
1323 case ISD::SRA: {
1325 return;
1326
1328 return;
1329
1330 // Optimize (sra (sext_inreg X, i16), C) ->
1331 // (srai (slli X, (XLen-16), (XLen-16) + C)
1332 // And (sra (sext_inreg X, i8), C) ->
1333 // (srai (slli X, (XLen-8), (XLen-8) + C)
1334 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1335 // This transform matches the code we get without Zbb. The shifts are more
1336 // compressible, and this can help expose CSE opportunities in the sdiv by
1337 // constant optimization.
1338 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1339 if (!N1C)
1340 break;
1341 SDValue N0 = Node->getOperand(0);
1342 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1343 break;
1344 unsigned ShAmt = N1C->getZExtValue();
1345 unsigned ExtSize =
1346 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1347 // ExtSize of 32 should use sraiw via tablegen pattern.
1348 if (ExtSize >= 32 || ShAmt >= ExtSize)
1349 break;
1350 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1351 SDNode *SLLI =
1352 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1353 CurDAG->getTargetConstant(LShAmt, DL, VT));
1354 SDNode *SRAI = CurDAG->getMachineNode(
1355 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1356 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1357 ReplaceNode(Node, SRAI);
1358 return;
1359 }
1360 case ISD::OR: {
1362 return;
1363
1364 break;
1365 }
1366 case ISD::XOR:
1368 return;
1369
1370 break;
1371 case ISD::AND: {
1372 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1373 if (!N1C)
1374 break;
1375
1376 SDValue N0 = Node->getOperand(0);
1377
1378 bool LeftShift = N0.getOpcode() == ISD::SHL;
1379 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1380 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1381 if (!C)
1382 break;
1383 unsigned C2 = C->getZExtValue();
1384 unsigned XLen = Subtarget->getXLen();
1385 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1386
1387 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1388 // shift pair might offer more compression opportunities.
1389 // TODO: We could check for C extension here, but we don't have many lit
1390 // tests with the C extension enabled so not checking gets better
1391 // coverage.
1392 // TODO: What if ANDI faster than shift?
1393 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1394
1395 uint64_t C1 = N1C->getZExtValue();
1396
1397 // Clear irrelevant bits in the mask.
1398 if (LeftShift)
1400 else
1401 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1402
1403 // Some transforms should only be done if the shift has a single use or
1404 // the AND would become (srli (slli X, 32), 32)
1405 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1406
1407 SDValue X = N0.getOperand(0);
1408
1409 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1410 // with c3 leading zeros.
1411 if (!LeftShift && isMask_64(C1)) {
1412 unsigned Leading = XLen - llvm::bit_width(C1);
1413 if (C2 < Leading) {
1414 // If the number of leading zeros is C2+32 this can be SRLIW.
1415 if (C2 + 32 == Leading) {
1416 SDNode *SRLIW = CurDAG->getMachineNode(
1417 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1418 ReplaceNode(Node, SRLIW);
1419 return;
1420 }
1421
1422 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1423 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1424 //
1425 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1426 // legalized and goes through DAG combine.
1427 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1428 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1429 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1430 SDNode *SRAIW =
1431 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1432 CurDAG->getTargetConstant(31, DL, VT));
1433 SDNode *SRLIW = CurDAG->getMachineNode(
1434 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1435 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1436 ReplaceNode(Node, SRLIW);
1437 return;
1438 }
1439
1440 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1441 // available.
1442 // Transform (and (srl x, C2), C1)
1443 // -> (<bfextract> x, msb, lsb)
1444 //
1445 // Make sure to keep this below the SRLIW cases, as we always want to
1446 // prefer the more common instruction.
1447 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1448 const unsigned Lsb = C2;
1449 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1450 return;
1451
1452 // (srli (slli x, c3-c2), c3).
1453 // Skip if we could use (zext.w (sraiw X, C2)).
1454 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1455 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1456 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1457 // Also Skip if we can use bexti or th.tst.
1458 Skip |= HasBitTest && Leading == XLen - 1;
1459 if (OneUseOrZExtW && !Skip) {
1460 SDNode *SLLI = CurDAG->getMachineNode(
1461 RISCV::SLLI, DL, VT, X,
1462 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1463 SDNode *SRLI = CurDAG->getMachineNode(
1464 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1465 CurDAG->getTargetConstant(Leading, DL, VT));
1466 ReplaceNode(Node, SRLI);
1467 return;
1468 }
1469 }
1470 }
1471
1472 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1473 // shifted by c2 bits with c3 leading zeros.
1474 if (LeftShift && isShiftedMask_64(C1)) {
1475 unsigned Leading = XLen - llvm::bit_width(C1);
1476
1477 if (C2 + Leading < XLen &&
1478 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1479 // Use slli.uw when possible.
1480 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1481 SDNode *SLLI_UW =
1482 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1483 CurDAG->getTargetConstant(C2, DL, VT));
1484 ReplaceNode(Node, SLLI_UW);
1485 return;
1486 }
1487
1488 // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
1489 // available.
1490 // Transform (and (shl x, c2), c1)
1491 // -> (<bfinsert> x, msb, lsb)
1492 // e.g.
1493 // (and (shl x, 12), 0x00fff000)
1494 // If XLen = 32 and C2 = 12, then
1495 // Msb = 32 - 8 - 1 = 23 and Lsb = 12
1496 const unsigned Msb = XLen - Leading - 1;
1497 const unsigned Lsb = C2;
1498 if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
1499 return;
1500
1501 // (srli (slli c2+c3), c3)
1502 if (OneUseOrZExtW && !IsCANDI) {
1503 SDNode *SLLI = CurDAG->getMachineNode(
1504 RISCV::SLLI, DL, VT, X,
1505 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1506 SDNode *SRLI = CurDAG->getMachineNode(
1507 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1508 CurDAG->getTargetConstant(Leading, DL, VT));
1509 ReplaceNode(Node, SRLI);
1510 return;
1511 }
1512 }
1513 }
1514
1515 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1516 // shifted mask with c2 leading zeros and c3 trailing zeros.
1517 if (!LeftShift && isShiftedMask_64(C1)) {
1518 unsigned Leading = XLen - llvm::bit_width(C1);
1519 unsigned Trailing = llvm::countr_zero(C1);
1520 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1521 !IsCANDI) {
1522 unsigned SrliOpc = RISCV::SRLI;
1523 // If the input is zexti32 we should use SRLIW.
1524 if (X.getOpcode() == ISD::AND &&
1525 isa<ConstantSDNode>(X.getOperand(1)) &&
1526 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1527 SrliOpc = RISCV::SRLIW;
1528 X = X.getOperand(0);
1529 }
1530 SDNode *SRLI = CurDAG->getMachineNode(
1531 SrliOpc, DL, VT, X,
1532 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1533 SDNode *SLLI = CurDAG->getMachineNode(
1534 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1535 CurDAG->getTargetConstant(Trailing, DL, VT));
1536 ReplaceNode(Node, SLLI);
1537 return;
1538 }
1539 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1540 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1541 OneUseOrZExtW && !IsCANDI) {
1542 SDNode *SRLIW = CurDAG->getMachineNode(
1543 RISCV::SRLIW, DL, VT, X,
1544 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1545 SDNode *SLLI = CurDAG->getMachineNode(
1546 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1547 CurDAG->getTargetConstant(Trailing, DL, VT));
1548 ReplaceNode(Node, SLLI);
1549 return;
1550 }
1551 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1552 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1553 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1554 SDNode *SRLI = CurDAG->getMachineNode(
1555 RISCV::SRLI, DL, VT, X,
1556 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1557 SDNode *SLLI_UW = CurDAG->getMachineNode(
1558 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1559 CurDAG->getTargetConstant(Trailing, DL, VT));
1560 ReplaceNode(Node, SLLI_UW);
1561 return;
1562 }
1563 }
1564
1565 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1566 // shifted mask with no leading zeros and c3 trailing zeros.
1567 if (LeftShift && isShiftedMask_64(C1)) {
1568 unsigned Leading = XLen - llvm::bit_width(C1);
1569 unsigned Trailing = llvm::countr_zero(C1);
1570 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1571 SDNode *SRLI = CurDAG->getMachineNode(
1572 RISCV::SRLI, DL, VT, X,
1573 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1574 SDNode *SLLI = CurDAG->getMachineNode(
1575 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1576 CurDAG->getTargetConstant(Trailing, DL, VT));
1577 ReplaceNode(Node, SLLI);
1578 return;
1579 }
1580 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1581 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1582 SDNode *SRLIW = CurDAG->getMachineNode(
1583 RISCV::SRLIW, DL, VT, X,
1584 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1585 SDNode *SLLI = CurDAG->getMachineNode(
1586 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1587 CurDAG->getTargetConstant(Trailing, DL, VT));
1588 ReplaceNode(Node, SLLI);
1589 return;
1590 }
1591
1592 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1593 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1594 Subtarget->hasStdExtZba()) {
1595 SDNode *SRLI = CurDAG->getMachineNode(
1596 RISCV::SRLI, DL, VT, X,
1597 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1598 SDNode *SLLI_UW = CurDAG->getMachineNode(
1599 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1600 CurDAG->getTargetConstant(Trailing, DL, VT));
1601 ReplaceNode(Node, SLLI_UW);
1602 return;
1603 }
1604 }
1605 }
1606
1607 const uint64_t C1 = N1C->getZExtValue();
1608
1609 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1610 N0.hasOneUse()) {
1611 unsigned C2 = N0.getConstantOperandVal(1);
1612 unsigned XLen = Subtarget->getXLen();
1613 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1614
1615 SDValue X = N0.getOperand(0);
1616
1617 // Prefer SRAIW + ANDI when possible.
1618 bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1619 X.getOpcode() == ISD::SHL &&
1620 isa<ConstantSDNode>(X.getOperand(1)) &&
1621 X.getConstantOperandVal(1) == 32;
1622 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1623 // mask with c3 leading zeros and c2 is larger than c3.
1624 if (isMask_64(C1) && !Skip) {
1625 unsigned Leading = XLen - llvm::bit_width(C1);
1626 if (C2 > Leading) {
1627 SDNode *SRAI = CurDAG->getMachineNode(
1628 RISCV::SRAI, DL, VT, X,
1629 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1630 SDNode *SRLI = CurDAG->getMachineNode(
1631 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1632 CurDAG->getTargetConstant(Leading, DL, VT));
1633 ReplaceNode(Node, SRLI);
1634 return;
1635 }
1636 }
1637
1638 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1639 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1640 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1641 if (isShiftedMask_64(C1) && !Skip) {
1642 unsigned Leading = XLen - llvm::bit_width(C1);
1643 unsigned Trailing = llvm::countr_zero(C1);
1644 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1645 SDNode *SRAI = CurDAG->getMachineNode(
1646 RISCV::SRAI, DL, VT, N0.getOperand(0),
1647 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1648 SDNode *SRLI = CurDAG->getMachineNode(
1649 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1650 CurDAG->getTargetConstant(Leading + Trailing, DL, VT));
1651 SDNode *SLLI = CurDAG->getMachineNode(
1652 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1653 CurDAG->getTargetConstant(Trailing, DL, VT));
1654 ReplaceNode(Node, SLLI);
1655 return;
1656 }
1657 }
1658 }
1659
1660 // If C1 masks off the upper bits only (but can't be formed as an
1661 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1662 // available.
1663 // Transform (and x, C1)
1664 // -> (<bfextract> x, msb, lsb)
1665 if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue()) &&
1666 !(C1 == 0xffff && Subtarget->hasStdExtZbb()) &&
1667 !(C1 == 0xffffffff && Subtarget->hasStdExtZba())) {
1668 const unsigned Msb = llvm::bit_width(C1) - 1;
1669 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1670 return;
1671 }
1672
1674 return;
1675
1676 break;
1677 }
1678 case ISD::MUL: {
1679 // Special case for calculating (mul (and X, C2), C1) where the full product
1680 // fits in XLen bits. We can shift X left by the number of leading zeros in
1681 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1682 // product has XLen trailing zeros, putting it in the output of MULHU. This
1683 // can avoid materializing a constant in a register for C2.
1684
1685 // RHS should be a constant.
1686 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1687 if (!N1C || !N1C->hasOneUse())
1688 break;
1689
1690 // LHS should be an AND with constant.
1691 SDValue N0 = Node->getOperand(0);
1692 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1693 break;
1694
1696
1697 // Constant should be a mask.
1698 if (!isMask_64(C2))
1699 break;
1700
1701 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1702 // multiple users or the constant is a simm12. This prevents inserting a
1703 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1704 // make it more costly to materialize. Otherwise, using a SLLI might allow
1705 // it to be compressed.
1706 bool IsANDIOrZExt =
1707 isInt<12>(C2) ||
1708 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1709 // With XTHeadBb, we can use TH.EXTU.
1710 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1711 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1712 break;
1713 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1714 // the constant is a simm32.
1715 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1716 // With XTHeadBb, we can use TH.EXTU.
1717 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1718 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1719 break;
1720
1721 // We need to shift left the AND input and C1 by a total of XLen bits.
1722
1723 // How far left do we need to shift the AND input?
1724 unsigned XLen = Subtarget->getXLen();
1725 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1726
1727 // The constant gets shifted by the remaining amount unless that would
1728 // shift bits out.
1729 uint64_t C1 = N1C->getZExtValue();
1730 unsigned ConstantShift = XLen - LeadingZeros;
1731 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1732 break;
1733
1734 uint64_t ShiftedC1 = C1 << ConstantShift;
1735 // If this RV32, we need to sign extend the constant.
1736 if (XLen == 32)
1737 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1738
1739 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1740 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1741 SDNode *SLLI =
1742 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1743 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1744 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1745 SDValue(SLLI, 0), SDValue(Imm, 0));
1746 ReplaceNode(Node, MULHU);
1747 return;
1748 }
1749 case ISD::LOAD: {
1750 if (tryIndexedLoad(Node))
1751 return;
1752
1753 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1754 // We match post-incrementing load here
1756 if (Load->getAddressingMode() != ISD::POST_INC)
1757 break;
1758
1759 SDValue Chain = Node->getOperand(0);
1760 SDValue Base = Node->getOperand(1);
1761 SDValue Offset = Node->getOperand(2);
1762
1763 bool Simm12 = false;
1764 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1765
1766 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1767 int ConstantVal = ConstantOffset->getSExtValue();
1768 Simm12 = isInt<12>(ConstantVal);
1769 if (Simm12)
1770 Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1771 Offset.getValueType());
1772 }
1773
1774 unsigned Opcode = 0;
1775 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1776 case MVT::i8:
1777 if (Simm12 && SignExtend)
1778 Opcode = RISCV::CV_LB_ri_inc;
1779 else if (Simm12 && !SignExtend)
1780 Opcode = RISCV::CV_LBU_ri_inc;
1781 else if (!Simm12 && SignExtend)
1782 Opcode = RISCV::CV_LB_rr_inc;
1783 else
1784 Opcode = RISCV::CV_LBU_rr_inc;
1785 break;
1786 case MVT::i16:
1787 if (Simm12 && SignExtend)
1788 Opcode = RISCV::CV_LH_ri_inc;
1789 else if (Simm12 && !SignExtend)
1790 Opcode = RISCV::CV_LHU_ri_inc;
1791 else if (!Simm12 && SignExtend)
1792 Opcode = RISCV::CV_LH_rr_inc;
1793 else
1794 Opcode = RISCV::CV_LHU_rr_inc;
1795 break;
1796 case MVT::i32:
1797 if (Simm12)
1798 Opcode = RISCV::CV_LW_ri_inc;
1799 else
1800 Opcode = RISCV::CV_LW_rr_inc;
1801 break;
1802 default:
1803 break;
1804 }
1805 if (!Opcode)
1806 break;
1807
1808 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1809 Chain.getSimpleValueType(), Base,
1810 Offset, Chain));
1811 return;
1812 }
1813 break;
1814 }
1815 case RISCVISD::LD_RV32: {
1816 assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
1817
1819 SDValue Chain = Node->getOperand(0);
1820 SDValue Addr = Node->getOperand(1);
1822
1823 SDValue Ops[] = {Base, Offset, Chain};
1824 MachineSDNode *New = CurDAG->getMachineNode(
1825 RISCV::LD_RV32, DL, {MVT::Untyped, MVT::Other}, Ops);
1826 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1827 MVT::i32, SDValue(New, 0));
1828 SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL,
1829 MVT::i32, SDValue(New, 0));
1830 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1831 ReplaceUses(SDValue(Node, 0), Lo);
1832 ReplaceUses(SDValue(Node, 1), Hi);
1833 ReplaceUses(SDValue(Node, 2), SDValue(New, 1));
1834 CurDAG->RemoveDeadNode(Node);
1835 return;
1836 }
1837 case RISCVISD::SD_RV32: {
1839 SDValue Chain = Node->getOperand(0);
1840 SDValue Addr = Node->getOperand(3);
1842
1843 SDValue Lo = Node->getOperand(1);
1844 SDValue Hi = Node->getOperand(2);
1845
1846 SDValue RegPair;
1847 // Peephole to use X0_Pair for storing zero.
1849 RegPair = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
1850 } else {
1851 SDValue Ops[] = {
1852 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Lo,
1853 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Hi,
1854 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1855
1856 RegPair = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
1857 MVT::Untyped, Ops),
1858 0);
1859 }
1860
1861 MachineSDNode *New = CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other,
1862 {RegPair, Base, Offset, Chain});
1863 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1864 ReplaceUses(SDValue(Node, 0), SDValue(New, 0));
1865 CurDAG->RemoveDeadNode(Node);
1866 return;
1867 }
1869 unsigned IntNo = Node->getConstantOperandVal(0);
1870 switch (IntNo) {
1871 // By default we do not custom select any intrinsic.
1872 default:
1873 break;
1874 case Intrinsic::riscv_vmsgeu:
1875 case Intrinsic::riscv_vmsge: {
1876 SDValue Src1 = Node->getOperand(1);
1877 SDValue Src2 = Node->getOperand(2);
1878 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1879 bool IsCmpConstant = false;
1880 bool IsCmpMinimum = false;
1881 // Only custom select scalar second operand.
1882 if (Src2.getValueType() != XLenVT)
1883 break;
1884 // Small constants are handled with patterns.
1885 int64_t CVal = 0;
1886 MVT Src1VT = Src1.getSimpleValueType();
1887 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1888 IsCmpConstant = true;
1889 CVal = C->getSExtValue();
1890 if (CVal >= -15 && CVal <= 16) {
1891 if (!IsUnsigned || CVal != 0)
1892 break;
1893 IsCmpMinimum = true;
1894 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1895 Src1VT.getScalarSizeInBits())
1896 .getSExtValue()) {
1897 IsCmpMinimum = true;
1898 }
1899 }
1900 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
1901 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1902 default:
1903 llvm_unreachable("Unexpected LMUL!");
1904#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
1905 case RISCVVType::lmulenum: \
1906 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1907 : RISCV::PseudoVMSLT_VX_##suffix; \
1908 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
1909 : RISCV::PseudoVMSGT_VX_##suffix; \
1910 break;
1911 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1912 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1913 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1914 CASE_VMSLT_OPCODES(LMUL_1, M1)
1915 CASE_VMSLT_OPCODES(LMUL_2, M2)
1916 CASE_VMSLT_OPCODES(LMUL_4, M4)
1917 CASE_VMSLT_OPCODES(LMUL_8, M8)
1918#undef CASE_VMSLT_OPCODES
1919 }
1920 // Mask operations use the LMUL from the mask type.
1921 switch (RISCVTargetLowering::getLMUL(VT)) {
1922 default:
1923 llvm_unreachable("Unexpected LMUL!");
1924#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
1925 case RISCVVType::lmulenum: \
1926 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1927 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
1928 break;
1929 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
1930 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
1931 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
1932 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
1933 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
1934 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
1935 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
1936#undef CASE_VMNAND_VMSET_OPCODES
1937 }
1938 SDValue SEW = CurDAG->getTargetConstant(
1939 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1940 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1941 SDValue VL;
1942 selectVLOp(Node->getOperand(3), VL);
1943
1944 // If vmsge(u) with minimum value, expand it to vmset.
1945 if (IsCmpMinimum) {
1947 CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, MaskSEW));
1948 return;
1949 }
1950
1951 if (IsCmpConstant) {
1952 SDValue Imm =
1953 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
1954
1955 ReplaceNode(Node, CurDAG->getMachineNode(VMSGTOpcode, DL, VT,
1956 {Src1, Imm, VL, SEW}));
1957 return;
1958 }
1959
1960 // Expand to
1961 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1962 SDValue Cmp = SDValue(
1963 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1964 0);
1965 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1966 {Cmp, Cmp, VL, MaskSEW}));
1967 return;
1968 }
1969 case Intrinsic::riscv_vmsgeu_mask:
1970 case Intrinsic::riscv_vmsge_mask: {
1971 SDValue Src1 = Node->getOperand(2);
1972 SDValue Src2 = Node->getOperand(3);
1973 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1974 bool IsCmpConstant = false;
1975 bool IsCmpMinimum = false;
1976 // Only custom select scalar second operand.
1977 if (Src2.getValueType() != XLenVT)
1978 break;
1979 // Small constants are handled with patterns.
1980 MVT Src1VT = Src1.getSimpleValueType();
1981 int64_t CVal = 0;
1982 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1983 IsCmpConstant = true;
1984 CVal = C->getSExtValue();
1985 if (CVal >= -15 && CVal <= 16) {
1986 if (!IsUnsigned || CVal != 0)
1987 break;
1988 IsCmpMinimum = true;
1989 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1990 Src1VT.getScalarSizeInBits())
1991 .getSExtValue()) {
1992 IsCmpMinimum = true;
1993 }
1994 }
1995 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1996 VMOROpcode, VMSGTMaskOpcode;
1997 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1998 default:
1999 llvm_unreachable("Unexpected LMUL!");
2000#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2001 case RISCVVType::lmulenum: \
2002 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2003 : RISCV::PseudoVMSLT_VX_##suffix; \
2004 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
2005 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
2006 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
2007 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
2008 break;
2009 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2010 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2011 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2012 CASE_VMSLT_OPCODES(LMUL_1, M1)
2013 CASE_VMSLT_OPCODES(LMUL_2, M2)
2014 CASE_VMSLT_OPCODES(LMUL_4, M4)
2015 CASE_VMSLT_OPCODES(LMUL_8, M8)
2016#undef CASE_VMSLT_OPCODES
2017 }
2018 // Mask operations use the LMUL from the mask type.
2019 switch (RISCVTargetLowering::getLMUL(VT)) {
2020 default:
2021 llvm_unreachable("Unexpected LMUL!");
2022#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
2023 case RISCVVType::lmulenum: \
2024 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
2025 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
2026 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
2027 break;
2028 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
2029 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
2030 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
2035#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
2036 }
2037 SDValue SEW = CurDAG->getTargetConstant(
2038 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
2039 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
2040 SDValue VL;
2041 selectVLOp(Node->getOperand(5), VL);
2042 SDValue MaskedOff = Node->getOperand(1);
2043 SDValue Mask = Node->getOperand(4);
2044
2045 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
2046 if (IsCmpMinimum) {
2047 // We don't need vmor if the MaskedOff and the Mask are the same
2048 // value.
2049 if (Mask == MaskedOff) {
2050 ReplaceUses(Node, Mask.getNode());
2051 return;
2052 }
2054 CurDAG->getMachineNode(VMOROpcode, DL, VT,
2055 {Mask, MaskedOff, VL, MaskSEW}));
2056 return;
2057 }
2058
2059 // If the MaskedOff value and the Mask are the same value use
2060 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
2061 // This avoids needing to copy v0 to vd before starting the next sequence.
2062 if (Mask == MaskedOff) {
2063 SDValue Cmp = SDValue(
2064 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2065 0);
2066 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
2067 {Mask, Cmp, VL, MaskSEW}));
2068 return;
2069 }
2070
2071 SDValue PolicyOp =
2072 CurDAG->getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT);
2073
2074 if (IsCmpConstant) {
2075 SDValue Imm =
2076 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2077
2078 ReplaceNode(Node, CurDAG->getMachineNode(
2079 VMSGTMaskOpcode, DL, VT,
2080 {MaskedOff, Src1, Imm, Mask, VL, SEW, PolicyOp}));
2081 return;
2082 }
2083
2084 // Otherwise use
2085 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
2086 // The result is mask undisturbed.
2087 // We use the same instructions to emulate mask agnostic behavior, because
2088 // the agnostic result can be either undisturbed or all 1.
2089 SDValue Cmp = SDValue(CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
2090 {MaskedOff, Src1, Src2, Mask,
2091 VL, SEW, PolicyOp}),
2092 0);
2093 // vmxor.mm vd, vd, v0 is used to update active value.
2094 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
2095 {Cmp, Mask, VL, MaskSEW}));
2096 return;
2097 }
2098 case Intrinsic::riscv_vsetvli:
2099 case Intrinsic::riscv_vsetvlimax:
2100 return selectVSETVLI(Node);
2101 case Intrinsic::riscv_sf_vsettnt:
2102 case Intrinsic::riscv_sf_vsettm:
2103 case Intrinsic::riscv_sf_vsettk:
2104 return selectXSfmmVSET(Node);
2105 }
2106 break;
2107 }
2109 unsigned IntNo = Node->getConstantOperandVal(1);
2110 switch (IntNo) {
2111 // By default we do not custom select any intrinsic.
2112 default:
2113 break;
2114 case Intrinsic::riscv_vlseg2:
2115 case Intrinsic::riscv_vlseg3:
2116 case Intrinsic::riscv_vlseg4:
2117 case Intrinsic::riscv_vlseg5:
2118 case Intrinsic::riscv_vlseg6:
2119 case Intrinsic::riscv_vlseg7:
2120 case Intrinsic::riscv_vlseg8: {
2121 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2122 /*IsStrided*/ false);
2123 return;
2124 }
2125 case Intrinsic::riscv_vlseg2_mask:
2126 case Intrinsic::riscv_vlseg3_mask:
2127 case Intrinsic::riscv_vlseg4_mask:
2128 case Intrinsic::riscv_vlseg5_mask:
2129 case Intrinsic::riscv_vlseg6_mask:
2130 case Intrinsic::riscv_vlseg7_mask:
2131 case Intrinsic::riscv_vlseg8_mask: {
2132 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2133 /*IsStrided*/ false);
2134 return;
2135 }
2136 case Intrinsic::riscv_vlsseg2:
2137 case Intrinsic::riscv_vlsseg3:
2138 case Intrinsic::riscv_vlsseg4:
2139 case Intrinsic::riscv_vlsseg5:
2140 case Intrinsic::riscv_vlsseg6:
2141 case Intrinsic::riscv_vlsseg7:
2142 case Intrinsic::riscv_vlsseg8: {
2143 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2144 /*IsStrided*/ true);
2145 return;
2146 }
2147 case Intrinsic::riscv_vlsseg2_mask:
2148 case Intrinsic::riscv_vlsseg3_mask:
2149 case Intrinsic::riscv_vlsseg4_mask:
2150 case Intrinsic::riscv_vlsseg5_mask:
2151 case Intrinsic::riscv_vlsseg6_mask:
2152 case Intrinsic::riscv_vlsseg7_mask:
2153 case Intrinsic::riscv_vlsseg8_mask: {
2154 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2155 /*IsStrided*/ true);
2156 return;
2157 }
2158 case Intrinsic::riscv_vloxseg2:
2159 case Intrinsic::riscv_vloxseg3:
2160 case Intrinsic::riscv_vloxseg4:
2161 case Intrinsic::riscv_vloxseg5:
2162 case Intrinsic::riscv_vloxseg6:
2163 case Intrinsic::riscv_vloxseg7:
2164 case Intrinsic::riscv_vloxseg8:
2165 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2166 /*IsOrdered*/ true);
2167 return;
2168 case Intrinsic::riscv_vluxseg2:
2169 case Intrinsic::riscv_vluxseg3:
2170 case Intrinsic::riscv_vluxseg4:
2171 case Intrinsic::riscv_vluxseg5:
2172 case Intrinsic::riscv_vluxseg6:
2173 case Intrinsic::riscv_vluxseg7:
2174 case Intrinsic::riscv_vluxseg8:
2175 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2176 /*IsOrdered*/ false);
2177 return;
2178 case Intrinsic::riscv_vloxseg2_mask:
2179 case Intrinsic::riscv_vloxseg3_mask:
2180 case Intrinsic::riscv_vloxseg4_mask:
2181 case Intrinsic::riscv_vloxseg5_mask:
2182 case Intrinsic::riscv_vloxseg6_mask:
2183 case Intrinsic::riscv_vloxseg7_mask:
2184 case Intrinsic::riscv_vloxseg8_mask:
2185 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2186 /*IsOrdered*/ true);
2187 return;
2188 case Intrinsic::riscv_vluxseg2_mask:
2189 case Intrinsic::riscv_vluxseg3_mask:
2190 case Intrinsic::riscv_vluxseg4_mask:
2191 case Intrinsic::riscv_vluxseg5_mask:
2192 case Intrinsic::riscv_vluxseg6_mask:
2193 case Intrinsic::riscv_vluxseg7_mask:
2194 case Intrinsic::riscv_vluxseg8_mask:
2195 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2196 /*IsOrdered*/ false);
2197 return;
2198 case Intrinsic::riscv_vlseg8ff:
2199 case Intrinsic::riscv_vlseg7ff:
2200 case Intrinsic::riscv_vlseg6ff:
2201 case Intrinsic::riscv_vlseg5ff:
2202 case Intrinsic::riscv_vlseg4ff:
2203 case Intrinsic::riscv_vlseg3ff:
2204 case Intrinsic::riscv_vlseg2ff: {
2205 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ false);
2206 return;
2207 }
2208 case Intrinsic::riscv_vlseg8ff_mask:
2209 case Intrinsic::riscv_vlseg7ff_mask:
2210 case Intrinsic::riscv_vlseg6ff_mask:
2211 case Intrinsic::riscv_vlseg5ff_mask:
2212 case Intrinsic::riscv_vlseg4ff_mask:
2213 case Intrinsic::riscv_vlseg3ff_mask:
2214 case Intrinsic::riscv_vlseg2ff_mask: {
2215 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ true);
2216 return;
2217 }
2218 case Intrinsic::riscv_vloxei:
2219 case Intrinsic::riscv_vloxei_mask:
2220 case Intrinsic::riscv_vluxei:
2221 case Intrinsic::riscv_vluxei_mask: {
2222 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
2223 IntNo == Intrinsic::riscv_vluxei_mask;
2224 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
2225 IntNo == Intrinsic::riscv_vloxei_mask;
2226
2227 MVT VT = Node->getSimpleValueType(0);
2228 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2229
2230 unsigned CurOp = 2;
2231 SmallVector<SDValue, 8> Operands;
2232 Operands.push_back(Node->getOperand(CurOp++));
2233
2234 MVT IndexVT;
2235 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2236 /*IsStridedOrIndexed*/ true, Operands,
2237 /*IsLoad=*/true, &IndexVT);
2238
2240 "Element count mismatch");
2241
2244 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2245 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2246 reportFatalUsageError("The V extension does not support EEW=64 for "
2247 "index values when XLEN=32");
2248 }
2249 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2250 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
2251 static_cast<unsigned>(IndexLMUL));
2252 MachineSDNode *Load =
2253 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2254
2255 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2256
2257 ReplaceNode(Node, Load);
2258 return;
2259 }
2260 case Intrinsic::riscv_vlm:
2261 case Intrinsic::riscv_vle:
2262 case Intrinsic::riscv_vle_mask:
2263 case Intrinsic::riscv_vlse:
2264 case Intrinsic::riscv_vlse_mask: {
2265 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2266 IntNo == Intrinsic::riscv_vlse_mask;
2267 bool IsStrided =
2268 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2269
2270 MVT VT = Node->getSimpleValueType(0);
2271 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2272
2273 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2274 // operand at the IR level. In pseudos, they have both policy and
2275 // passthru operand. The passthru operand is needed to track the
2276 // "tail undefined" state, and the policy is there just for
2277 // for consistency - it will always be "don't care" for the
2278 // unmasked form.
2279 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2280 unsigned CurOp = 2;
2281 SmallVector<SDValue, 8> Operands;
2282 if (HasPassthruOperand)
2283 Operands.push_back(Node->getOperand(CurOp++));
2284 else {
2285 // We eagerly lower to implicit_def (instead of undef), as we
2286 // otherwise fail to select nodes such as: nxv1i1 = undef
2287 SDNode *Passthru =
2288 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
2289 Operands.push_back(SDValue(Passthru, 0));
2290 }
2291 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2292 Operands, /*IsLoad=*/true);
2293
2295 const RISCV::VLEPseudo *P =
2296 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2297 static_cast<unsigned>(LMUL));
2298 MachineSDNode *Load =
2299 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2300
2301 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2302
2303 ReplaceNode(Node, Load);
2304 return;
2305 }
2306 case Intrinsic::riscv_vleff:
2307 case Intrinsic::riscv_vleff_mask: {
2308 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2309
2310 MVT VT = Node->getSimpleValueType(0);
2311 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2312
2313 unsigned CurOp = 2;
2314 SmallVector<SDValue, 7> Operands;
2315 Operands.push_back(Node->getOperand(CurOp++));
2316 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2317 /*IsStridedOrIndexed*/ false, Operands,
2318 /*IsLoad=*/true);
2319
2321 const RISCV::VLEPseudo *P =
2322 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2323 Log2SEW, static_cast<unsigned>(LMUL));
2324 MachineSDNode *Load = CurDAG->getMachineNode(
2325 P->Pseudo, DL, Node->getVTList(), Operands);
2326 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2327
2328 ReplaceNode(Node, Load);
2329 return;
2330 }
2331 case Intrinsic::riscv_nds_vln:
2332 case Intrinsic::riscv_nds_vln_mask:
2333 case Intrinsic::riscv_nds_vlnu:
2334 case Intrinsic::riscv_nds_vlnu_mask: {
2335 bool IsMasked = IntNo == Intrinsic::riscv_nds_vln_mask ||
2336 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2337 bool IsUnsigned = IntNo == Intrinsic::riscv_nds_vlnu ||
2338 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2339
2340 MVT VT = Node->getSimpleValueType(0);
2341 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2342 unsigned CurOp = 2;
2343 SmallVector<SDValue, 8> Operands;
2344
2345 Operands.push_back(Node->getOperand(CurOp++));
2346 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2347 /*IsStridedOrIndexed=*/false, Operands,
2348 /*IsLoad=*/true);
2349
2351 const RISCV::NDSVLNPseudo *P = RISCV::getNDSVLNPseudo(
2352 IsMasked, IsUnsigned, Log2SEW, static_cast<unsigned>(LMUL));
2353 MachineSDNode *Load =
2354 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2355
2356 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2357 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2358
2359 ReplaceNode(Node, Load);
2360 return;
2361 }
2362 }
2363 break;
2364 }
2365 case ISD::INTRINSIC_VOID: {
2366 unsigned IntNo = Node->getConstantOperandVal(1);
2367 switch (IntNo) {
2368 case Intrinsic::riscv_vsseg2:
2369 case Intrinsic::riscv_vsseg3:
2370 case Intrinsic::riscv_vsseg4:
2371 case Intrinsic::riscv_vsseg5:
2372 case Intrinsic::riscv_vsseg6:
2373 case Intrinsic::riscv_vsseg7:
2374 case Intrinsic::riscv_vsseg8: {
2375 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2376 /*IsStrided*/ false);
2377 return;
2378 }
2379 case Intrinsic::riscv_vsseg2_mask:
2380 case Intrinsic::riscv_vsseg3_mask:
2381 case Intrinsic::riscv_vsseg4_mask:
2382 case Intrinsic::riscv_vsseg5_mask:
2383 case Intrinsic::riscv_vsseg6_mask:
2384 case Intrinsic::riscv_vsseg7_mask:
2385 case Intrinsic::riscv_vsseg8_mask: {
2386 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2387 /*IsStrided*/ false);
2388 return;
2389 }
2390 case Intrinsic::riscv_vssseg2:
2391 case Intrinsic::riscv_vssseg3:
2392 case Intrinsic::riscv_vssseg4:
2393 case Intrinsic::riscv_vssseg5:
2394 case Intrinsic::riscv_vssseg6:
2395 case Intrinsic::riscv_vssseg7:
2396 case Intrinsic::riscv_vssseg8: {
2397 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2398 /*IsStrided*/ true);
2399 return;
2400 }
2401 case Intrinsic::riscv_vssseg2_mask:
2402 case Intrinsic::riscv_vssseg3_mask:
2403 case Intrinsic::riscv_vssseg4_mask:
2404 case Intrinsic::riscv_vssseg5_mask:
2405 case Intrinsic::riscv_vssseg6_mask:
2406 case Intrinsic::riscv_vssseg7_mask:
2407 case Intrinsic::riscv_vssseg8_mask: {
2408 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2409 /*IsStrided*/ true);
2410 return;
2411 }
2412 case Intrinsic::riscv_vsoxseg2:
2413 case Intrinsic::riscv_vsoxseg3:
2414 case Intrinsic::riscv_vsoxseg4:
2415 case Intrinsic::riscv_vsoxseg5:
2416 case Intrinsic::riscv_vsoxseg6:
2417 case Intrinsic::riscv_vsoxseg7:
2418 case Intrinsic::riscv_vsoxseg8:
2419 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2420 /*IsOrdered*/ true);
2421 return;
2422 case Intrinsic::riscv_vsuxseg2:
2423 case Intrinsic::riscv_vsuxseg3:
2424 case Intrinsic::riscv_vsuxseg4:
2425 case Intrinsic::riscv_vsuxseg5:
2426 case Intrinsic::riscv_vsuxseg6:
2427 case Intrinsic::riscv_vsuxseg7:
2428 case Intrinsic::riscv_vsuxseg8:
2429 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2430 /*IsOrdered*/ false);
2431 return;
2432 case Intrinsic::riscv_vsoxseg2_mask:
2433 case Intrinsic::riscv_vsoxseg3_mask:
2434 case Intrinsic::riscv_vsoxseg4_mask:
2435 case Intrinsic::riscv_vsoxseg5_mask:
2436 case Intrinsic::riscv_vsoxseg6_mask:
2437 case Intrinsic::riscv_vsoxseg7_mask:
2438 case Intrinsic::riscv_vsoxseg8_mask:
2439 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2440 /*IsOrdered*/ true);
2441 return;
2442 case Intrinsic::riscv_vsuxseg2_mask:
2443 case Intrinsic::riscv_vsuxseg3_mask:
2444 case Intrinsic::riscv_vsuxseg4_mask:
2445 case Intrinsic::riscv_vsuxseg5_mask:
2446 case Intrinsic::riscv_vsuxseg6_mask:
2447 case Intrinsic::riscv_vsuxseg7_mask:
2448 case Intrinsic::riscv_vsuxseg8_mask:
2449 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2450 /*IsOrdered*/ false);
2451 return;
2452 case Intrinsic::riscv_vsoxei:
2453 case Intrinsic::riscv_vsoxei_mask:
2454 case Intrinsic::riscv_vsuxei:
2455 case Intrinsic::riscv_vsuxei_mask: {
2456 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2457 IntNo == Intrinsic::riscv_vsuxei_mask;
2458 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2459 IntNo == Intrinsic::riscv_vsoxei_mask;
2460
2461 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2462 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2463
2464 unsigned CurOp = 2;
2465 SmallVector<SDValue, 8> Operands;
2466 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2467
2468 MVT IndexVT;
2469 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2470 /*IsStridedOrIndexed*/ true, Operands,
2471 /*IsLoad=*/false, &IndexVT);
2472
2474 "Element count mismatch");
2475
2478 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2479 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2480 reportFatalUsageError("The V extension does not support EEW=64 for "
2481 "index values when XLEN=32");
2482 }
2483 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2484 IsMasked, IsOrdered, IndexLog2EEW,
2485 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2486 MachineSDNode *Store =
2487 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2488
2489 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2490
2491 ReplaceNode(Node, Store);
2492 return;
2493 }
2494 case Intrinsic::riscv_vsm:
2495 case Intrinsic::riscv_vse:
2496 case Intrinsic::riscv_vse_mask:
2497 case Intrinsic::riscv_vsse:
2498 case Intrinsic::riscv_vsse_mask: {
2499 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2500 IntNo == Intrinsic::riscv_vsse_mask;
2501 bool IsStrided =
2502 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2503
2504 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2505 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2506
2507 unsigned CurOp = 2;
2508 SmallVector<SDValue, 8> Operands;
2509 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2510
2511 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2512 Operands);
2513
2515 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2516 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2517 MachineSDNode *Store =
2518 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2519 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2520
2521 ReplaceNode(Node, Store);
2522 return;
2523 }
2524 case Intrinsic::riscv_sf_vc_x_se:
2525 case Intrinsic::riscv_sf_vc_i_se:
2527 return;
2528 case Intrinsic::riscv_sf_vlte8:
2529 case Intrinsic::riscv_sf_vlte16:
2530 case Intrinsic::riscv_sf_vlte32:
2531 case Intrinsic::riscv_sf_vlte64: {
2532 unsigned Log2SEW;
2533 unsigned PseudoInst;
2534 switch (IntNo) {
2535 case Intrinsic::riscv_sf_vlte8:
2536 PseudoInst = RISCV::PseudoSF_VLTE8;
2537 Log2SEW = 3;
2538 break;
2539 case Intrinsic::riscv_sf_vlte16:
2540 PseudoInst = RISCV::PseudoSF_VLTE16;
2541 Log2SEW = 4;
2542 break;
2543 case Intrinsic::riscv_sf_vlte32:
2544 PseudoInst = RISCV::PseudoSF_VLTE32;
2545 Log2SEW = 5;
2546 break;
2547 case Intrinsic::riscv_sf_vlte64:
2548 PseudoInst = RISCV::PseudoSF_VLTE64;
2549 Log2SEW = 6;
2550 break;
2551 }
2552
2553 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2554 SDValue TWidenOp = CurDAG->getTargetConstant(1, DL, XLenVT);
2555 SDValue Operands[] = {Node->getOperand(2),
2556 Node->getOperand(3),
2557 Node->getOperand(4),
2558 SEWOp,
2559 TWidenOp,
2560 Node->getOperand(0)};
2561
2562 MachineSDNode *TileLoad =
2563 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2564 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2565 CurDAG->setNodeMemRefs(TileLoad, {MemOp->getMemOperand()});
2566
2567 ReplaceNode(Node, TileLoad);
2568 return;
2569 }
2570 case Intrinsic::riscv_sf_mm_s_s:
2571 case Intrinsic::riscv_sf_mm_s_u:
2572 case Intrinsic::riscv_sf_mm_u_s:
2573 case Intrinsic::riscv_sf_mm_u_u:
2574 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2575 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2576 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2577 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2578 case Intrinsic::riscv_sf_mm_f_f: {
2579 bool HasFRM = false;
2580 unsigned PseudoInst;
2581 switch (IntNo) {
2582 case Intrinsic::riscv_sf_mm_s_s:
2583 PseudoInst = RISCV::PseudoSF_MM_S_S;
2584 break;
2585 case Intrinsic::riscv_sf_mm_s_u:
2586 PseudoInst = RISCV::PseudoSF_MM_S_U;
2587 break;
2588 case Intrinsic::riscv_sf_mm_u_s:
2589 PseudoInst = RISCV::PseudoSF_MM_U_S;
2590 break;
2591 case Intrinsic::riscv_sf_mm_u_u:
2592 PseudoInst = RISCV::PseudoSF_MM_U_U;
2593 break;
2594 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2595 PseudoInst = RISCV::PseudoSF_MM_E5M2_E5M2;
2596 HasFRM = true;
2597 break;
2598 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2599 PseudoInst = RISCV::PseudoSF_MM_E5M2_E4M3;
2600 HasFRM = true;
2601 break;
2602 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2603 PseudoInst = RISCV::PseudoSF_MM_E4M3_E5M2;
2604 HasFRM = true;
2605 break;
2606 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2607 PseudoInst = RISCV::PseudoSF_MM_E4M3_E4M3;
2608 HasFRM = true;
2609 break;
2610 case Intrinsic::riscv_sf_mm_f_f:
2611 if (Node->getOperand(3).getValueType().getScalarType() == MVT::bf16)
2612 PseudoInst = RISCV::PseudoSF_MM_F_F_ALT;
2613 else
2614 PseudoInst = RISCV::PseudoSF_MM_F_F;
2615 HasFRM = true;
2616 break;
2617 }
2618 uint64_t TileNum = Node->getConstantOperandVal(2);
2619 SDValue Op1 = Node->getOperand(3);
2620 SDValue Op2 = Node->getOperand(4);
2621 MVT VT = Op1->getSimpleValueType(0);
2622 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2623 SDValue TmOp = Node->getOperand(5);
2624 SDValue TnOp = Node->getOperand(6);
2625 SDValue TkOp = Node->getOperand(7);
2626 SDValue TWidenOp = Node->getOperand(8);
2627 SDValue Chain = Node->getOperand(0);
2628
2629 // sf.mm.f.f with sew=32, twiden=2 is invalid
2630 if (IntNo == Intrinsic::riscv_sf_mm_f_f && Log2SEW == 5 &&
2631 TWidenOp->getAsZExtVal() == 2)
2632 reportFatalUsageError("sf.mm.f.f doesn't support (sew=32, twiden=2)");
2633
2634 SmallVector<SDValue, 10> Operands(
2635 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Op1, Op2});
2636 if (HasFRM)
2637 Operands.push_back(
2638 CurDAG->getTargetConstant(RISCVFPRndMode::DYN, DL, XLenVT));
2639 Operands.append({TmOp, TnOp, TkOp,
2640 CurDAG->getTargetConstant(Log2SEW, DL, XLenVT), TWidenOp,
2641 Chain});
2642
2643 auto *NewNode =
2644 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2645
2646 ReplaceNode(Node, NewNode);
2647 return;
2648 }
2649 case Intrinsic::riscv_sf_vtzero_t: {
2650 uint64_t TileNum = Node->getConstantOperandVal(2);
2651 SDValue Tm = Node->getOperand(3);
2652 SDValue Tn = Node->getOperand(4);
2653 SDValue Log2SEW = Node->getOperand(5);
2654 SDValue TWiden = Node->getOperand(6);
2655 SDValue Chain = Node->getOperand(0);
2656 auto *NewNode = CurDAG->getMachineNode(
2657 RISCV::PseudoSF_VTZERO_T, DL, Node->getVTList(),
2658 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Tm, Tn, Log2SEW,
2659 TWiden, Chain});
2660
2661 ReplaceNode(Node, NewNode);
2662 return;
2663 }
2664 }
2665 break;
2666 }
2667 case ISD::BITCAST: {
2668 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2669 // Just drop bitcasts between vectors if both are fixed or both are
2670 // scalable.
2671 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2672 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2673 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2674 CurDAG->RemoveDeadNode(Node);
2675 return;
2676 }
2677 if (Subtarget->enablePExtCodeGen()) {
2678 bool Is32BitCast =
2679 (VT == MVT::i32 && (SrcVT == MVT::v4i8 || SrcVT == MVT::v2i16)) ||
2680 (SrcVT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
2681 bool Is64BitCast =
2682 (VT == MVT::i64 && (SrcVT == MVT::v8i8 || SrcVT == MVT::v4i16 ||
2683 SrcVT == MVT::v2i32)) ||
2684 (SrcVT == MVT::i64 &&
2685 (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32));
2686 if (Is32BitCast || Is64BitCast) {
2687 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2688 CurDAG->RemoveDeadNode(Node);
2689 return;
2690 }
2691 }
2692 break;
2693 }
2695 case RISCVISD::TUPLE_INSERT: {
2696 SDValue V = Node->getOperand(0);
2697 SDValue SubV = Node->getOperand(1);
2698 SDLoc DL(SubV);
2699 auto Idx = Node->getConstantOperandVal(2);
2700 MVT SubVecVT = SubV.getSimpleValueType();
2701
2702 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2703 MVT SubVecContainerVT = SubVecVT;
2704 // Establish the correct scalable-vector types for any fixed-length type.
2705 if (SubVecVT.isFixedLengthVector()) {
2706 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2708 [[maybe_unused]] bool ExactlyVecRegSized =
2709 Subtarget->expandVScale(SubVecVT.getSizeInBits())
2710 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2711 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2712 .getKnownMinValue()));
2713 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2714 }
2715 MVT ContainerVT = VT;
2716 if (VT.isFixedLengthVector())
2717 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2718
2719 const auto *TRI = Subtarget->getRegisterInfo();
2720 unsigned SubRegIdx;
2721 std::tie(SubRegIdx, Idx) =
2723 ContainerVT, SubVecContainerVT, Idx, TRI);
2724
2725 // If the Idx hasn't been completely eliminated then this is a subvector
2726 // insert which doesn't naturally align to a vector register. These must
2727 // be handled using instructions to manipulate the vector registers.
2728 if (Idx != 0)
2729 break;
2730
2731 RISCVVType::VLMUL SubVecLMUL =
2732 RISCVTargetLowering::getLMUL(SubVecContainerVT);
2733 [[maybe_unused]] bool IsSubVecPartReg =
2734 SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
2735 SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
2736 SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
2737 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
2738 V.isUndef()) &&
2739 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2740 "the subvector is smaller than a full-sized register");
2741
2742 // If we haven't set a SubRegIdx, then we must be going between
2743 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2744 if (SubRegIdx == RISCV::NoSubRegister) {
2745 unsigned InRegClassID =
2748 InRegClassID &&
2749 "Unexpected subvector extraction");
2750 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2751 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2752 DL, VT, SubV, RC);
2753 ReplaceNode(Node, NewNode);
2754 return;
2755 }
2756
2757 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2758 ReplaceNode(Node, Insert.getNode());
2759 return;
2760 }
2762 case RISCVISD::TUPLE_EXTRACT: {
2763 SDValue V = Node->getOperand(0);
2764 auto Idx = Node->getConstantOperandVal(1);
2765 MVT InVT = V.getSimpleValueType();
2766 SDLoc DL(V);
2767
2768 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2769 MVT SubVecContainerVT = VT;
2770 // Establish the correct scalable-vector types for any fixed-length type.
2771 if (VT.isFixedLengthVector()) {
2772 assert(Idx == 0);
2773 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2774 }
2775 if (InVT.isFixedLengthVector())
2776 InVT = TLI.getContainerForFixedLengthVector(InVT);
2777
2778 const auto *TRI = Subtarget->getRegisterInfo();
2779 unsigned SubRegIdx;
2780 std::tie(SubRegIdx, Idx) =
2782 InVT, SubVecContainerVT, Idx, TRI);
2783
2784 // If the Idx hasn't been completely eliminated then this is a subvector
2785 // extract which doesn't naturally align to a vector register. These must
2786 // be handled using instructions to manipulate the vector registers.
2787 if (Idx != 0)
2788 break;
2789
2790 // If we haven't set a SubRegIdx, then we must be going between
2791 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2792 if (SubRegIdx == RISCV::NoSubRegister) {
2793 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2795 InRegClassID &&
2796 "Unexpected subvector extraction");
2797 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2798 SDNode *NewNode =
2799 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2800 ReplaceNode(Node, NewNode);
2801 return;
2802 }
2803
2804 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2805 ReplaceNode(Node, Extract.getNode());
2806 return;
2807 }
2808 case RISCVISD::VMV_S_X_VL:
2809 case RISCVISD::VFMV_S_F_VL:
2810 case RISCVISD::VMV_V_X_VL:
2811 case RISCVISD::VFMV_V_F_VL: {
2812 // Try to match splat of a scalar load to a strided load with stride of x0.
2813 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2814 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2815 if (!Node->getOperand(0).isUndef())
2816 break;
2817 SDValue Src = Node->getOperand(1);
2818 auto *Ld = dyn_cast<LoadSDNode>(Src);
2819 // Can't fold load update node because the second
2820 // output is used so that load update node can't be removed.
2821 if (!Ld || Ld->isIndexed())
2822 break;
2823 EVT MemVT = Ld->getMemoryVT();
2824 // The memory VT should be the same size as the element type.
2825 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2826 break;
2827 if (!IsProfitableToFold(Src, Node, Node) ||
2828 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2829 break;
2830
2831 SDValue VL;
2832 if (IsScalarMove) {
2833 // We could deal with more VL if we update the VSETVLI insert pass to
2834 // avoid introducing more VSETVLI.
2835 if (!isOneConstant(Node->getOperand(2)))
2836 break;
2837 selectVLOp(Node->getOperand(2), VL);
2838 } else
2839 selectVLOp(Node->getOperand(2), VL);
2840
2841 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2842 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2843
2844 // If VL=1, then we don't need to do a strided load and can just do a
2845 // regular load.
2846 bool IsStrided = !isOneConstant(VL);
2847
2848 // Only do a strided load if we have optimized zero-stride vector load.
2849 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2850 break;
2851
2852 SmallVector<SDValue> Operands = {
2853 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2854 Ld->getBasePtr()};
2855 if (IsStrided)
2856 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2858 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2859 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2860
2862 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2863 /*IsMasked*/ false, IsStrided, /*FF*/ false,
2864 Log2SEW, static_cast<unsigned>(LMUL));
2865 MachineSDNode *Load =
2866 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2867 // Update the chain.
2868 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2869 // Record the mem-refs
2870 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2871 // Replace the splat with the vlse.
2872 ReplaceNode(Node, Load);
2873 return;
2874 }
2875 case ISD::PREFETCH:
2876 unsigned Locality = Node->getConstantOperandVal(3);
2877 if (Locality > 2)
2878 break;
2879
2880 auto *LoadStoreMem = cast<MemSDNode>(Node);
2881 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2883
2884 int NontemporalLevel = 0;
2885 switch (Locality) {
2886 case 0:
2887 NontemporalLevel = 3; // NTL.ALL
2888 break;
2889 case 1:
2890 NontemporalLevel = 1; // NTL.PALL
2891 break;
2892 case 2:
2893 NontemporalLevel = 0; // NTL.P1
2894 break;
2895 default:
2896 llvm_unreachable("unexpected locality value.");
2897 }
2898
2899 if (NontemporalLevel & 0b1)
2901 if (NontemporalLevel & 0b10)
2903 break;
2904 }
2905
2906 // Select the default instruction.
2907 SelectCode(Node);
2908}
2909
2911 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2912 std::vector<SDValue> &OutOps) {
2913 // Always produce a register and immediate operand, as expected by
2914 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2915 switch (ConstraintID) {
2918 SDValue Op0, Op1;
2919 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2920 assert(Found && "SelectAddrRegImm should always succeed");
2921 OutOps.push_back(Op0);
2922 OutOps.push_back(Op1);
2923 return false;
2924 }
2926 OutOps.push_back(Op);
2927 OutOps.push_back(
2928 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2929 return false;
2930 default:
2931 report_fatal_error("Unexpected asm memory constraint " +
2932 InlineAsm::getMemConstraintName(ConstraintID));
2933 }
2934
2935 return true;
2936}
2937
2939 SDValue &Offset) {
2940 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2941 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2942 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2943 return true;
2944 }
2945
2946 return false;
2947}
2948
2949// Fold constant addresses.
2950static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2951 const MVT VT, const RISCVSubtarget *Subtarget,
2953 bool IsPrefetch = false) {
2954 if (!isa<ConstantSDNode>(Addr))
2955 return false;
2956
2957 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2958
2959 // If the constant is a simm12, we can fold the whole constant and use X0 as
2960 // the base. If the constant can be materialized with LUI+simm12, use LUI as
2961 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2962 int64_t Lo12 = SignExtend64<12>(CVal);
2963 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2964 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2965 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2966 return false;
2967 if (Hi) {
2968 int64_t Hi20 = (Hi >> 12) & 0xfffff;
2969 Base = SDValue(
2970 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2971 CurDAG->getTargetConstant(Hi20, DL, VT)),
2972 0);
2973 } else {
2974 Base = CurDAG->getRegister(RISCV::X0, VT);
2975 }
2976 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
2977 return true;
2978 }
2979
2980 // Ask how constant materialization would handle this constant.
2981 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2982
2983 // If the last instruction would be an ADDI, we can fold its immediate and
2984 // emit the rest of the sequence as the base.
2985 if (Seq.back().getOpcode() != RISCV::ADDI)
2986 return false;
2987 Lo12 = Seq.back().getImm();
2988 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2989 return false;
2990
2991 // Drop the last instruction.
2992 Seq.pop_back();
2993 assert(!Seq.empty() && "Expected more instructions in sequence");
2994
2995 Base = selectImmSeq(CurDAG, DL, VT, Seq);
2996 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
2997 return true;
2998}
2999
3000// Is this ADD instruction only used as the base pointer of scalar loads and
3001// stores?
3003 for (auto *User : Add->users()) {
3004 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
3005 User->getOpcode() != RISCVISD::LD_RV32 &&
3006 User->getOpcode() != RISCVISD::SD_RV32 &&
3007 User->getOpcode() != ISD::ATOMIC_LOAD &&
3008 User->getOpcode() != ISD::ATOMIC_STORE)
3009 return false;
3010 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3011 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
3012 VT != MVT::f64)
3013 return false;
3014 // Don't allow stores of the value. It must be used as the address.
3015 if (User->getOpcode() == ISD::STORE &&
3016 cast<StoreSDNode>(User)->getValue() == Add)
3017 return false;
3018 if (User->getOpcode() == ISD::ATOMIC_STORE &&
3019 cast<AtomicSDNode>(User)->getVal() == Add)
3020 return false;
3021 if (User->getOpcode() == RISCVISD::SD_RV32 &&
3022 (User->getOperand(0) == Add || User->getOperand(1) == Add))
3023 return false;
3024 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
3025 return false;
3026 }
3027
3028 return true;
3029}
3030
3032 switch (User->getOpcode()) {
3033 default:
3034 return false;
3035 case ISD::LOAD:
3036 case RISCVISD::LD_RV32:
3037 case ISD::ATOMIC_LOAD:
3038 break;
3039 case ISD::STORE:
3040 // Don't allow stores of Add. It must only be used as the address.
3041 if (cast<StoreSDNode>(User)->getValue() == Add)
3042 return false;
3043 break;
3044 case RISCVISD::SD_RV32:
3045 // Don't allow stores of Add. It must only be used as the address.
3046 if (User->getOperand(0) == Add || User->getOperand(1) == Add)
3047 return false;
3048 break;
3049 case ISD::ATOMIC_STORE:
3050 // Don't allow stores of Add. It must only be used as the address.
3051 if (cast<AtomicSDNode>(User)->getVal() == Add)
3052 return false;
3053 break;
3054 }
3055
3056 return true;
3057}
3058
3059// To prevent SelectAddrRegImm from folding offsets that conflict with the
3060// fusion of PseudoMovAddr, check if the offset of every use of a given address
3061// is within the alignment.
3063 Align Alignment) {
3064 assert(Addr->getOpcode() == RISCVISD::ADD_LO);
3065 for (auto *User : Addr->users()) {
3066 // If the user is a load or store, then the offset is 0 which is always
3067 // within alignment.
3068 if (isRegImmLoadOrStore(User, Addr))
3069 continue;
3070
3071 if (CurDAG->isBaseWithConstantOffset(SDValue(User, 0))) {
3072 int64_t CVal = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3073 if (!isInt<12>(CVal) || Alignment <= CVal)
3074 return false;
3075
3076 // Make sure all uses are foldable load/stores.
3077 for (auto *AddUser : User->users())
3078 if (!isRegImmLoadOrStore(AddUser, SDValue(User, 0)))
3079 return false;
3080
3081 continue;
3082 }
3083
3084 return false;
3085 }
3086
3087 return true;
3088}
3089
3091 SDValue &Offset) {
3092 if (SelectAddrFrameIndex(Addr, Base, Offset))
3093 return true;
3094
3095 SDLoc DL(Addr);
3096 MVT VT = Addr.getSimpleValueType();
3097
3098 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
3099 bool CanFold = true;
3100 // Unconditionally fold if operand 1 is not a global address (e.g.
3101 // externsymbol)
3102 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
3103 const DataLayout &DL = CurDAG->getDataLayout();
3104 Align Alignment = commonAlignment(
3105 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3106 if (!areOffsetsWithinAlignment(Addr, Alignment))
3107 CanFold = false;
3108 }
3109 if (CanFold) {
3110 Base = Addr.getOperand(0);
3111 Offset = Addr.getOperand(1);
3112 return true;
3113 }
3114 }
3115
3116 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3117 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3118 if (isInt<12>(CVal)) {
3119 Base = Addr.getOperand(0);
3120 if (Base.getOpcode() == RISCVISD::ADD_LO) {
3121 SDValue LoOperand = Base.getOperand(1);
3122 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
3123 // If the Lo in (ADD_LO hi, lo) is a global variable's address
3124 // (its low part, really), then we can rely on the alignment of that
3125 // variable to provide a margin of safety before low part can overflow
3126 // the 12 bits of the load/store offset. Check if CVal falls within
3127 // that margin; if so (low part + CVal) can't overflow.
3128 const DataLayout &DL = CurDAG->getDataLayout();
3129 Align Alignment = commonAlignment(
3130 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3131 if ((CVal == 0 || Alignment > CVal) &&
3132 areOffsetsWithinAlignment(Base, Alignment)) {
3133 int64_t CombinedOffset = CVal + GA->getOffset();
3134 Base = Base.getOperand(0);
3135 Offset = CurDAG->getTargetGlobalAddress(
3136 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
3137 CombinedOffset, GA->getTargetFlags());
3138 return true;
3139 }
3140 }
3141 }
3142
3143 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3144 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3145 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3146 return true;
3147 }
3148 }
3149
3150 // Handle ADD with large immediates.
3151 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3152 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3153 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3154
3155 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
3156 // an ADDI for part of the offset and fold the rest into the load/store.
3157 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
3158 if (CVal >= -4096 && CVal <= 4094) {
3159 int64_t Adj = CVal < 0 ? -2048 : 2047;
3160 Base = SDValue(
3161 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
3162 CurDAG->getSignedTargetConstant(Adj, DL, VT)),
3163 0);
3164 Offset = CurDAG->getSignedTargetConstant(CVal - Adj, DL, VT);
3165 return true;
3166 }
3167
3168 // For larger immediates, we might be able to save one instruction from
3169 // constant materialization by folding the Lo12 bits of the immediate into
3170 // the address. We should only do this if the ADD is only used by loads and
3171 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
3172 // separately with the full materialized immediate creating extra
3173 // instructions.
3174 if (isWorthFoldingAdd(Addr) &&
3175 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3176 Offset, /*IsPrefetch=*/false)) {
3177 // Insert an ADD instruction with the materialized Hi52 bits.
3178 Base = SDValue(
3179 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3180 0);
3181 return true;
3182 }
3183 }
3184
3185 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3186 /*IsPrefetch=*/false))
3187 return true;
3188
3189 Base = Addr;
3190 Offset = CurDAG->getTargetConstant(0, DL, VT);
3191 return true;
3192}
3193
3194/// Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
3196 SDValue &Offset) {
3197 if (SelectAddrFrameIndex(Addr, Base, Offset))
3198 return true;
3199
3200 SDLoc DL(Addr);
3201 MVT VT = Addr.getSimpleValueType();
3202
3203 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3204 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3205 if (isUInt<9>(CVal)) {
3206 Base = Addr.getOperand(0);
3207
3208 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3209 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3210 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3211 return true;
3212 }
3213 }
3214
3215 Base = Addr;
3216 Offset = CurDAG->getTargetConstant(0, DL, VT);
3217 return true;
3218}
3219
3220/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
3221/// Offset should be all zeros.
3223 SDValue &Offset) {
3224 if (SelectAddrFrameIndex(Addr, Base, Offset))
3225 return true;
3226
3227 SDLoc DL(Addr);
3228 MVT VT = Addr.getSimpleValueType();
3229
3230 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3231 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3232 if (isInt<12>(CVal)) {
3233 Base = Addr.getOperand(0);
3234
3235 // Early-out if not a valid offset.
3236 if ((CVal & 0b11111) != 0) {
3237 Base = Addr;
3238 Offset = CurDAG->getTargetConstant(0, DL, VT);
3239 return true;
3240 }
3241
3242 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3243 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3244 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3245 return true;
3246 }
3247 }
3248
3249 // Handle ADD with large immediates.
3250 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3251 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3252 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3253
3254 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
3255 // one instruction by folding adjustment (-2048 or 2016) into the address.
3256 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
3257 int64_t Adj = CVal < 0 ? -2048 : 2016;
3258 int64_t AdjustedOffset = CVal - Adj;
3259 Base =
3260 SDValue(CurDAG->getMachineNode(
3261 RISCV::ADDI, DL, VT, Addr.getOperand(0),
3262 CurDAG->getSignedTargetConstant(AdjustedOffset, DL, VT)),
3263 0);
3264 Offset = CurDAG->getSignedTargetConstant(Adj, DL, VT);
3265 return true;
3266 }
3267
3268 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3269 Offset, /*IsPrefetch=*/true)) {
3270 // Insert an ADD instruction with the materialized Hi52 bits.
3271 Base = SDValue(
3272 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3273 0);
3274 return true;
3275 }
3276 }
3277
3278 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3279 /*IsPrefetch=*/true))
3280 return true;
3281
3282 Base = Addr;
3283 Offset = CurDAG->getTargetConstant(0, DL, VT);
3284 return true;
3285}
3286
3287/// Return true if this a load/store that we have a RegRegScale instruction for.
3289 const RISCVSubtarget &Subtarget) {
3290 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE)
3291 return false;
3292 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3293 if (!(VT.isScalarInteger() &&
3294 (Subtarget.hasVendorXTHeadMemIdx() || Subtarget.hasVendorXqcisls())) &&
3295 !((VT == MVT::f32 || VT == MVT::f64) &&
3296 Subtarget.hasVendorXTHeadFMemIdx()))
3297 return false;
3298 // Don't allow stores of the value. It must be used as the address.
3299 if (User->getOpcode() == ISD::STORE &&
3300 cast<StoreSDNode>(User)->getValue() == Add)
3301 return false;
3302
3303 return true;
3304}
3305
3306/// Is it profitable to fold this Add into RegRegScale load/store. If \p
3307/// Shift is non-null, then we have matched a shl+add. We allow reassociating
3308/// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) if there is a
3309/// single addi and we don't have a SHXADD instruction we could use.
3310/// FIXME: May still need to check how many and what kind of users the SHL has.
3312 SDValue Add,
3313 SDValue Shift = SDValue()) {
3314 bool FoundADDI = false;
3315 for (auto *User : Add->users()) {
3316 if (isRegRegScaleLoadOrStore(User, Add, Subtarget))
3317 continue;
3318
3319 // Allow a single ADDI that is used by loads/stores if we matched a shift.
3320 if (!Shift || FoundADDI || User->getOpcode() != ISD::ADD ||
3322 !isInt<12>(cast<ConstantSDNode>(User->getOperand(1))->getSExtValue()))
3323 return false;
3324
3325 FoundADDI = true;
3326
3327 // If we have a SHXADD instruction, prefer that over reassociating an ADDI.
3328 assert(Shift.getOpcode() == ISD::SHL);
3329 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
3330 if (Subtarget.hasShlAdd(ShiftAmt))
3331 return false;
3332
3333 // All users of the ADDI should be load/store.
3334 for (auto *ADDIUser : User->users())
3335 if (!isRegRegScaleLoadOrStore(ADDIUser, SDValue(User, 0), Subtarget))
3336 return false;
3337 }
3338
3339 return true;
3340}
3341
3343 unsigned MaxShiftAmount,
3344 SDValue &Base, SDValue &Index,
3345 SDValue &Scale) {
3346 if (Addr.getOpcode() != ISD::ADD)
3347 return false;
3348 SDValue LHS = Addr.getOperand(0);
3349 SDValue RHS = Addr.getOperand(1);
3350
3351 EVT VT = Addr.getSimpleValueType();
3352 auto SelectShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
3353 SDValue &Shift) {
3354 if (N.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(N.getOperand(1)))
3355 return false;
3356
3357 // Only match shifts by a value in range [0, MaxShiftAmount].
3358 unsigned ShiftAmt = N.getConstantOperandVal(1);
3359 if (ShiftAmt > MaxShiftAmount)
3360 return false;
3361
3362 Index = N.getOperand(0);
3363 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
3364 return true;
3365 };
3366
3367 if (auto *C1 = dyn_cast<ConstantSDNode>(RHS)) {
3368 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
3369 if (LHS.getOpcode() == ISD::ADD &&
3370 !isa<ConstantSDNode>(LHS.getOperand(1)) &&
3371 isInt<12>(C1->getSExtValue())) {
3372 if (SelectShl(LHS.getOperand(1), Index, Scale) &&
3373 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(1))) {
3374 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3375 SDLoc(Addr), VT);
3376 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3377 LHS.getOperand(0), C1Val),
3378 0);
3379 return true;
3380 }
3381
3382 // Add is commutative so we need to check both operands.
3383 if (SelectShl(LHS.getOperand(0), Index, Scale) &&
3384 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(0))) {
3385 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3386 SDLoc(Addr), VT);
3387 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3388 LHS.getOperand(1), C1Val),
3389 0);
3390 return true;
3391 }
3392 }
3393
3394 // Don't match add with constants.
3395 // FIXME: Is this profitable for large constants that have 0s in the lower
3396 // 12 bits that we can materialize with LUI?
3397 return false;
3398 }
3399
3400 // Try to match a shift on the RHS.
3401 if (SelectShl(RHS, Index, Scale)) {
3402 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, RHS))
3403 return false;
3404 Base = LHS;
3405 return true;
3406 }
3407
3408 // Try to match a shift on the LHS.
3409 if (SelectShl(LHS, Index, Scale)) {
3410 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, LHS))
3411 return false;
3412 Base = RHS;
3413 return true;
3414 }
3415
3416 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr))
3417 return false;
3418
3419 Base = LHS;
3420 Index = RHS;
3421 Scale = CurDAG->getTargetConstant(0, SDLoc(Addr), VT);
3422 return true;
3423}
3424
3426 unsigned MaxShiftAmount,
3427 unsigned Bits, SDValue &Base,
3428 SDValue &Index,
3429 SDValue &Scale) {
3430 if (!SelectAddrRegRegScale(Addr, MaxShiftAmount, Base, Index, Scale))
3431 return false;
3432
3433 if (Index.getOpcode() == ISD::AND) {
3434 auto *C = dyn_cast<ConstantSDNode>(Index.getOperand(1));
3435 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3436 Index = Index.getOperand(0);
3437 return true;
3438 }
3439 }
3440
3441 return false;
3442}
3443
3445 SDValue &Offset) {
3446 if (Addr.getOpcode() != ISD::ADD)
3447 return false;
3448
3449 if (isa<ConstantSDNode>(Addr.getOperand(1)))
3450 return false;
3451
3452 Base = Addr.getOperand(0);
3453 Offset = Addr.getOperand(1);
3454 return true;
3455}
3456
3458 SDValue &ShAmt) {
3459 ShAmt = N;
3460
3461 // Peek through zext.
3462 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
3463 ShAmt = ShAmt.getOperand(0);
3464
3465 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
3466 // amount. If there is an AND on the shift amount, we can bypass it if it
3467 // doesn't affect any of those bits.
3468 if (ShAmt.getOpcode() == ISD::AND &&
3469 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3470 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
3471
3472 // Since the max shift amount is a power of 2 we can subtract 1 to make a
3473 // mask that covers the bits needed to represent all shift amounts.
3474 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
3475 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
3476
3477 if (ShMask.isSubsetOf(AndMask)) {
3478 ShAmt = ShAmt.getOperand(0);
3479 } else {
3480 // SimplifyDemandedBits may have optimized the mask so try restoring any
3481 // bits that are known zero.
3482 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
3483 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
3484 return true;
3485 ShAmt = ShAmt.getOperand(0);
3486 }
3487 }
3488
3489 if (ShAmt.getOpcode() == ISD::ADD &&
3490 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3491 uint64_t Imm = ShAmt.getConstantOperandVal(1);
3492 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
3493 // to avoid the ADD.
3494 if (Imm != 0 && Imm % ShiftWidth == 0) {
3495 ShAmt = ShAmt.getOperand(0);
3496 return true;
3497 }
3498 } else if (ShAmt.getOpcode() == ISD::SUB &&
3499 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
3500 uint64_t Imm = ShAmt.getConstantOperandVal(0);
3501 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3502 // generate a NEG instead of a SUB of a constant.
3503 if (Imm != 0 && Imm % ShiftWidth == 0) {
3504 SDLoc DL(ShAmt);
3505 EVT VT = ShAmt.getValueType();
3506 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
3507 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
3508 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
3509 ShAmt.getOperand(1));
3510 ShAmt = SDValue(Neg, 0);
3511 return true;
3512 }
3513 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3514 // to generate a NOT instead of a SUB of a constant.
3515 if (Imm % ShiftWidth == ShiftWidth - 1) {
3516 SDLoc DL(ShAmt);
3517 EVT VT = ShAmt.getValueType();
3518 MachineSDNode *Not = CurDAG->getMachineNode(
3519 RISCV::XORI, DL, VT, ShAmt.getOperand(1),
3520 CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true));
3521 ShAmt = SDValue(Not, 0);
3522 return true;
3523 }
3524 }
3525
3526 return true;
3527}
3528
3529/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
3530/// check for equality with 0. This function emits instructions that convert the
3531/// seteq/setne into something that can be compared with 0.
3532/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
3533/// ISD::SETNE).
3535 SDValue &Val) {
3536 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
3537 "Unexpected condition code!");
3538
3539 // We're looking for a setcc.
3540 if (N->getOpcode() != ISD::SETCC)
3541 return false;
3542
3543 // Must be an equality comparison.
3544 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
3545 if (CCVal != ExpectedCCVal)
3546 return false;
3547
3548 SDValue LHS = N->getOperand(0);
3549 SDValue RHS = N->getOperand(1);
3550
3551 if (!LHS.getValueType().isScalarInteger())
3552 return false;
3553
3554 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
3555 if (isNullConstant(RHS)) {
3556 Val = LHS;
3557 return true;
3558 }
3559
3560 SDLoc DL(N);
3561
3562 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
3563 int64_t CVal = C->getSExtValue();
3564 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
3565 // non-zero otherwise.
3566 if (CVal == -2048) {
3567 Val = SDValue(
3568 CurDAG->getMachineNode(
3569 RISCV::XORI, DL, N->getValueType(0), LHS,
3570 CurDAG->getSignedTargetConstant(CVal, DL, N->getValueType(0))),
3571 0);
3572 return true;
3573 }
3574 // If the RHS is [-2047,2048], we can use addi/addiw with -RHS to produce 0
3575 // if the LHS is equal to the RHS and non-zero otherwise.
3576 if (isInt<12>(CVal) || CVal == 2048) {
3577 unsigned Opc = RISCV::ADDI;
3578 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3579 cast<VTSDNode>(LHS.getOperand(1))->getVT() == MVT::i32) {
3580 Opc = RISCV::ADDIW;
3581 LHS = LHS.getOperand(0);
3582 }
3583
3584 Val = SDValue(CurDAG->getMachineNode(Opc, DL, N->getValueType(0), LHS,
3585 CurDAG->getSignedTargetConstant(
3586 -CVal, DL, N->getValueType(0))),
3587 0);
3588 return true;
3589 }
3590 if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) {
3591 Val = SDValue(
3592 CurDAG->getMachineNode(
3593 RISCV::BINVI, DL, N->getValueType(0), LHS,
3594 CurDAG->getTargetConstant(Log2_64(CVal), DL, N->getValueType(0))),
3595 0);
3596 return true;
3597 }
3598 // Same as the addi case above but for larger immediates (signed 26-bit) use
3599 // the QC_E_ADDI instruction from the Xqcilia extension, if available. Avoid
3600 // anything which can be done with a single lui as it might be compressible.
3601 if (Subtarget->hasVendorXqcilia() && isInt<26>(CVal) &&
3602 (CVal & 0xFFF) != 0) {
3603 Val = SDValue(
3604 CurDAG->getMachineNode(
3605 RISCV::QC_E_ADDI, DL, N->getValueType(0), LHS,
3606 CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
3607 0);
3608 return true;
3609 }
3610 }
3611
3612 // If nothing else we can XOR the LHS and RHS to produce zero if they are
3613 // equal and a non-zero value if they aren't.
3614 Val = SDValue(
3615 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
3616 return true;
3617}
3618
3620 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3621 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
3622 Val = N.getOperand(0);
3623 return true;
3624 }
3625
3626 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
3627 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
3628 return N;
3629
3630 SDValue N0 = N.getOperand(0);
3631 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3632 N.getConstantOperandVal(1) == ShiftAmt &&
3633 N0.getConstantOperandVal(1) == ShiftAmt)
3634 return N0.getOperand(0);
3635
3636 return N;
3637 };
3638
3639 MVT VT = N.getSimpleValueType();
3640 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
3641 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
3642 return true;
3643 }
3644
3645 return false;
3646}
3647
3649 if (N.getOpcode() == ISD::AND) {
3650 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3651 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3652 Val = N.getOperand(0);
3653 return true;
3654 }
3655 }
3656 MVT VT = N.getSimpleValueType();
3657 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
3658 if (CurDAG->MaskedValueIsZero(N, Mask)) {
3659 Val = N;
3660 return true;
3661 }
3662
3663 return false;
3664}
3665
3666/// Look for various patterns that can be done with a SHL that can be folded
3667/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
3668/// SHXADD we are trying to match.
3670 SDValue &Val) {
3671 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
3672 SDValue N0 = N.getOperand(0);
3673
3674 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
3675 (LeftShift || N0.getOpcode() == ISD::SRL) &&
3677 uint64_t Mask = N.getConstantOperandVal(1);
3678 unsigned C2 = N0.getConstantOperandVal(1);
3679
3680 unsigned XLen = Subtarget->getXLen();
3681 if (LeftShift)
3682 Mask &= maskTrailingZeros<uint64_t>(C2);
3683 else
3684 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
3685
3686 if (isShiftedMask_64(Mask)) {
3687 unsigned Leading = XLen - llvm::bit_width(Mask);
3688 unsigned Trailing = llvm::countr_zero(Mask);
3689 if (Trailing != ShAmt)
3690 return false;
3691
3692 unsigned Opcode;
3693 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
3694 // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
3695 // followed by a SHXADD with c3 for the X amount.
3696 if (LeftShift && Leading == 0 && C2 < Trailing)
3697 Opcode = RISCV::SRLI;
3698 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
3699 // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
3700 // followed by a SHXADD with c3 for the X amount.
3701 else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
3702 Opcode = RISCV::SRLIW;
3703 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3704 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
3705 // followed by a SHXADD using c3 for the X amount.
3706 else if (!LeftShift && Leading == C2)
3707 Opcode = RISCV::SRLI;
3708 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
3709 // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
3710 // followed by a SHXADD using c3 for the X amount.
3711 else if (!LeftShift && Leading == 32 + C2)
3712 Opcode = RISCV::SRLIW;
3713 else
3714 return false;
3715
3716 SDLoc DL(N);
3717 EVT VT = N.getValueType();
3718 ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
3719 Val = SDValue(
3720 CurDAG->getMachineNode(Opcode, DL, VT, N0.getOperand(0),
3721 CurDAG->getTargetConstant(ShAmt, DL, VT)),
3722 0);
3723 return true;
3724 }
3725 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
3727 uint64_t Mask = N.getConstantOperandVal(1);
3728 unsigned C2 = N0.getConstantOperandVal(1);
3729
3730 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
3731 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
3732 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
3733 // the X amount.
3734 if (isShiftedMask_64(Mask)) {
3735 unsigned XLen = Subtarget->getXLen();
3736 unsigned Leading = XLen - llvm::bit_width(Mask);
3737 unsigned Trailing = llvm::countr_zero(Mask);
3738 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
3739 SDLoc DL(N);
3740 EVT VT = N.getValueType();
3741 Val = SDValue(CurDAG->getMachineNode(
3742 RISCV::SRAI, DL, VT, N0.getOperand(0),
3743 CurDAG->getTargetConstant(C2 - Leading, DL, VT)),
3744 0);
3745 Val = SDValue(CurDAG->getMachineNode(
3746 RISCV::SRLI, DL, VT, Val,
3747 CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)),
3748 0);
3749 return true;
3750 }
3751 }
3752 }
3753 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
3754 (LeftShift || N.getOpcode() == ISD::SRL) &&
3755 isa<ConstantSDNode>(N.getOperand(1))) {
3756 SDValue N0 = N.getOperand(0);
3757 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
3759 uint64_t Mask = N0.getConstantOperandVal(1);
3760 if (isShiftedMask_64(Mask)) {
3761 unsigned C1 = N.getConstantOperandVal(1);
3762 unsigned XLen = Subtarget->getXLen();
3763 unsigned Leading = XLen - llvm::bit_width(Mask);
3764 unsigned Trailing = llvm::countr_zero(Mask);
3765 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
3766 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
3767 if (LeftShift && Leading == 32 && Trailing > 0 &&
3768 (Trailing + C1) == ShAmt) {
3769 SDLoc DL(N);
3770 EVT VT = N.getValueType();
3771 Val = SDValue(CurDAG->getMachineNode(
3772 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3773 CurDAG->getTargetConstant(Trailing, DL, VT)),
3774 0);
3775 return true;
3776 }
3777 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3778 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3779 if (!LeftShift && Leading == 32 && Trailing > C1 &&
3780 (Trailing - C1) == ShAmt) {
3781 SDLoc DL(N);
3782 EVT VT = N.getValueType();
3783 Val = SDValue(CurDAG->getMachineNode(
3784 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3785 CurDAG->getTargetConstant(Trailing, DL, VT)),
3786 0);
3787 return true;
3788 }
3789 }
3790 }
3791 }
3792
3793 return false;
3794}
3795
3796/// Look for various patterns that can be done with a SHL that can be folded
3797/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3798/// SHXADD_UW we are trying to match.
3800 SDValue &Val) {
3801 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
3802 N.hasOneUse()) {
3803 SDValue N0 = N.getOperand(0);
3804 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3805 N0.hasOneUse()) {
3806 uint64_t Mask = N.getConstantOperandVal(1);
3807 unsigned C2 = N0.getConstantOperandVal(1);
3808
3809 Mask &= maskTrailingZeros<uint64_t>(C2);
3810
3811 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3812 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3813 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3814 if (isShiftedMask_64(Mask)) {
3815 unsigned Leading = llvm::countl_zero(Mask);
3816 unsigned Trailing = llvm::countr_zero(Mask);
3817 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3818 SDLoc DL(N);
3819 EVT VT = N.getValueType();
3820 Val = SDValue(CurDAG->getMachineNode(
3821 RISCV::SLLI, DL, VT, N0.getOperand(0),
3822 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
3823 0);
3824 return true;
3825 }
3826 }
3827 }
3828 }
3829
3830 return false;
3831}
3832
3834 assert(N->getOpcode() == ISD::OR || N->getOpcode() == RISCVISD::OR_VL);
3835 if (N->getFlags().hasDisjoint())
3836 return true;
3837 return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
3838}
3839
3840bool RISCVDAGToDAGISel::selectImm64IfCheaper(int64_t Imm, int64_t OrigImm,
3841 SDValue N, SDValue &Val) {
3842 int OrigCost = RISCVMatInt::getIntMatCost(APInt(64, OrigImm), 64, *Subtarget,
3843 /*CompressionCost=*/true);
3844 int Cost = RISCVMatInt::getIntMatCost(APInt(64, Imm), 64, *Subtarget,
3845 /*CompressionCost=*/true);
3846 if (OrigCost <= Cost)
3847 return false;
3848
3849 Val = selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), Imm, *Subtarget);
3850 return true;
3851}
3852
3854 if (!isa<ConstantSDNode>(N))
3855 return false;
3856 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3857 if ((Imm >> 31) != 1)
3858 return false;
3859
3860 for (const SDNode *U : N->users()) {
3861 switch (U->getOpcode()) {
3862 case ISD::ADD:
3863 break;
3864 case ISD::OR:
3865 if (orDisjoint(U))
3866 break;
3867 return false;
3868 default:
3869 return false;
3870 }
3871 }
3872
3873 return selectImm64IfCheaper(0xffffffff00000000 | Imm, Imm, N, Val);
3874}
3875
3877 if (!isa<ConstantSDNode>(N))
3878 return false;
3879 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3880 if (isInt<32>(Imm))
3881 return false;
3882
3883 for (const SDNode *U : N->users()) {
3884 switch (U->getOpcode()) {
3885 case ISD::ADD:
3886 break;
3887 case RISCVISD::VMV_V_X_VL:
3888 if (!all_of(U->users(), [](const SDNode *V) {
3889 return V->getOpcode() == ISD::ADD ||
3890 V->getOpcode() == RISCVISD::ADD_VL;
3891 }))
3892 return false;
3893 break;
3894 default:
3895 return false;
3896 }
3897 }
3898
3899 return selectImm64IfCheaper(-Imm, Imm, N, Val);
3900}
3901
3903 if (!isa<ConstantSDNode>(N))
3904 return false;
3905 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3906
3907 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
3908 if (isInt<32>(Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
3909 return false;
3910
3911 // Abandon this transform if the constant is needed elsewhere.
3912 for (const SDNode *U : N->users()) {
3913 switch (U->getOpcode()) {
3914 case ISD::AND:
3915 case ISD::OR:
3916 case ISD::XOR:
3917 if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
3918 return false;
3919 break;
3920 case RISCVISD::VMV_V_X_VL:
3921 if (!Subtarget->hasStdExtZvkb())
3922 return false;
3923 if (!all_of(U->users(), [](const SDNode *V) {
3924 return V->getOpcode() == ISD::AND ||
3925 V->getOpcode() == RISCVISD::AND_VL;
3926 }))
3927 return false;
3928 break;
3929 default:
3930 return false;
3931 }
3932 }
3933
3934 if (isInt<32>(Imm)) {
3935 Val =
3936 selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), ~Imm, *Subtarget);
3937 return true;
3938 }
3939
3940 // For 64-bit constants, the instruction sequences get complex,
3941 // so we select inverted only if it's cheaper.
3942 return selectImm64IfCheaper(~Imm, Imm, N, Val);
3943}
3944
3945static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
3946 unsigned Bits,
3947 const TargetInstrInfo *TII) {
3948 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
3949
3950 if (!MCOpcode)
3951 return false;
3952
3953 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
3954 const uint64_t TSFlags = MCID.TSFlags;
3955 if (!RISCVII::hasSEWOp(TSFlags))
3956 return false;
3957 assert(RISCVII::hasVLOp(TSFlags));
3958
3959 unsigned ChainOpIdx = User->getNumOperands() - 1;
3960 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
3961 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
3962 unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
3963 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
3964
3965 if (UserOpNo == VLIdx)
3966 return false;
3967
3968 auto NumDemandedBits =
3969 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
3970 return NumDemandedBits && Bits >= *NumDemandedBits;
3971}
3972
3973// Return true if all users of this SDNode* only consume the lower \p Bits.
3974// This can be used to form W instructions for add/sub/mul/shl even when the
3975// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3976// SimplifyDemandedBits has made it so some users see a sext_inreg and some
3977// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3978// the add/sub/mul/shl to become non-W instructions. By checking the users we
3979// may be able to use a W instruction and CSE with the other instruction if
3980// this has happened. We could try to detect that the CSE opportunity exists
3981// before doing this, but that would be more complicated.
3983 const unsigned Depth) const {
3984 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3985 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3986 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3987 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3988 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3989 isa<ConstantSDNode>(Node) || Depth != 0) &&
3990 "Unexpected opcode");
3991
3993 return false;
3994
3995 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3996 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3997 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
3998 return false;
3999
4000 for (SDUse &Use : Node->uses()) {
4001 SDNode *User = Use.getUser();
4002 // Users of this node should have already been instruction selected
4003 if (!User->isMachineOpcode())
4004 return false;
4005
4006 // TODO: Add more opcodes?
4007 switch (User->getMachineOpcode()) {
4008 default:
4010 break;
4011 return false;
4012 case RISCV::ADDW:
4013 case RISCV::ADDIW:
4014 case RISCV::SUBW:
4015 case RISCV::MULW:
4016 case RISCV::SLLW:
4017 case RISCV::SLLIW:
4018 case RISCV::SRAW:
4019 case RISCV::SRAIW:
4020 case RISCV::SRLW:
4021 case RISCV::SRLIW:
4022 case RISCV::DIVW:
4023 case RISCV::DIVUW:
4024 case RISCV::REMW:
4025 case RISCV::REMUW:
4026 case RISCV::ROLW:
4027 case RISCV::RORW:
4028 case RISCV::RORIW:
4029 case RISCV::CLZW:
4030 case RISCV::CTZW:
4031 case RISCV::CPOPW:
4032 case RISCV::SLLI_UW:
4033 case RISCV::ABSW:
4034 case RISCV::FMV_W_X:
4035 case RISCV::FCVT_H_W:
4036 case RISCV::FCVT_H_W_INX:
4037 case RISCV::FCVT_H_WU:
4038 case RISCV::FCVT_H_WU_INX:
4039 case RISCV::FCVT_S_W:
4040 case RISCV::FCVT_S_W_INX:
4041 case RISCV::FCVT_S_WU:
4042 case RISCV::FCVT_S_WU_INX:
4043 case RISCV::FCVT_D_W:
4044 case RISCV::FCVT_D_W_INX:
4045 case RISCV::FCVT_D_WU:
4046 case RISCV::FCVT_D_WU_INX:
4047 case RISCV::TH_REVW:
4048 case RISCV::TH_SRRIW:
4049 if (Bits >= 32)
4050 break;
4051 return false;
4052 case RISCV::SLL:
4053 case RISCV::SRA:
4054 case RISCV::SRL:
4055 case RISCV::ROL:
4056 case RISCV::ROR:
4057 case RISCV::BSET:
4058 case RISCV::BCLR:
4059 case RISCV::BINV:
4060 // Shift amount operands only use log2(Xlen) bits.
4061 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
4062 break;
4063 return false;
4064 case RISCV::SLLI:
4065 // SLLI only uses the lower (XLen - ShAmt) bits.
4066 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
4067 break;
4068 return false;
4069 case RISCV::ANDI:
4070 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
4071 break;
4072 goto RecCheck;
4073 case RISCV::ORI: {
4074 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
4075 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
4076 break;
4077 [[fallthrough]];
4078 }
4079 case RISCV::AND:
4080 case RISCV::OR:
4081 case RISCV::XOR:
4082 case RISCV::XORI:
4083 case RISCV::ANDN:
4084 case RISCV::ORN:
4085 case RISCV::XNOR:
4086 case RISCV::SH1ADD:
4087 case RISCV::SH2ADD:
4088 case RISCV::SH3ADD:
4089 RecCheck:
4090 if (hasAllNBitUsers(User, Bits, Depth + 1))
4091 break;
4092 return false;
4093 case RISCV::SRLI: {
4094 unsigned ShAmt = User->getConstantOperandVal(1);
4095 // If we are shifting right by less than Bits, and users don't demand any
4096 // bits that were shifted into [Bits-1:0], then we can consider this as an
4097 // N-Bit user.
4098 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
4099 break;
4100 return false;
4101 }
4102 case RISCV::SEXT_B:
4103 case RISCV::PACKH:
4104 if (Bits >= 8)
4105 break;
4106 return false;
4107 case RISCV::SEXT_H:
4108 case RISCV::FMV_H_X:
4109 case RISCV::ZEXT_H_RV32:
4110 case RISCV::ZEXT_H_RV64:
4111 case RISCV::PACKW:
4112 if (Bits >= 16)
4113 break;
4114 return false;
4115 case RISCV::PACK:
4116 if (Bits >= (Subtarget->getXLen() / 2))
4117 break;
4118 return false;
4119 case RISCV::ADD_UW:
4120 case RISCV::SH1ADD_UW:
4121 case RISCV::SH2ADD_UW:
4122 case RISCV::SH3ADD_UW:
4123 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
4124 // 32 bits.
4125 if (Use.getOperandNo() == 0 && Bits >= 32)
4126 break;
4127 return false;
4128 case RISCV::SB:
4129 if (Use.getOperandNo() == 0 && Bits >= 8)
4130 break;
4131 return false;
4132 case RISCV::SH:
4133 if (Use.getOperandNo() == 0 && Bits >= 16)
4134 break;
4135 return false;
4136 case RISCV::SW:
4137 if (Use.getOperandNo() == 0 && Bits >= 32)
4138 break;
4139 return false;
4140 case RISCV::TH_EXT:
4141 case RISCV::TH_EXTU: {
4142 unsigned Msb = User->getConstantOperandVal(1);
4143 unsigned Lsb = User->getConstantOperandVal(2);
4144 // Behavior of Msb < Lsb is not well documented.
4145 if (Msb >= Lsb && Bits > Msb)
4146 break;
4147 return false;
4148 }
4149 }
4150 }
4151
4152 return true;
4153}
4154
4155// Select a constant that can be represented as (sign_extend(imm5) << imm2).
4157 SDValue &Shl2) {
4158 auto *C = dyn_cast<ConstantSDNode>(N);
4159 if (!C)
4160 return false;
4161
4162 int64_t Offset = C->getSExtValue();
4163 for (unsigned Shift = 0; Shift < 4; Shift++) {
4164 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) {
4165 EVT VT = N->getValueType(0);
4166 Simm5 = CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(N), VT);
4167 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), VT);
4168 return true;
4169 }
4170 }
4171
4172 return false;
4173}
4174
4175// Select VL as a 5 bit immediate or a value that will become a register. This
4176// allows us to choose between VSETIVLI or VSETVLI later.
4178 auto *C = dyn_cast<ConstantSDNode>(N);
4179 if (C && isUInt<5>(C->getZExtValue())) {
4180 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
4181 N->getValueType(0));
4182 } else if (C && C->isAllOnes()) {
4183 // Treat all ones as VLMax.
4184 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4185 N->getValueType(0));
4186 } else if (isa<RegisterSDNode>(N) &&
4187 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
4188 // All our VL operands use an operand that allows GPRNoX0 or an immediate
4189 // as the register class. Convert X0 to a special immediate to pass the
4190 // MachineVerifier. This is recognized specially by the vsetvli insertion
4191 // pass.
4192 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4193 N->getValueType(0));
4194 } else {
4195 VL = N;
4196 }
4197
4198 return true;
4199}
4200
4202 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
4203 if (!N.getOperand(0).isUndef())
4204 return SDValue();
4205 N = N.getOperand(1);
4206 }
4207 SDValue Splat = N;
4208 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
4209 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
4210 !Splat.getOperand(0).isUndef())
4211 return SDValue();
4212 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
4213 return Splat;
4214}
4215
4218 if (!Splat)
4219 return false;
4220
4221 SplatVal = Splat.getOperand(1);
4222 return true;
4223}
4224
4226 SelectionDAG &DAG,
4227 const RISCVSubtarget &Subtarget,
4228 std::function<bool(int64_t)> ValidateImm,
4229 bool Decrement = false) {
4231 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
4232 return false;
4233
4234 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
4235 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
4236 "Unexpected splat operand type");
4237
4238 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
4239 // type is wider than the resulting vector element type: an implicit
4240 // truncation first takes place. Therefore, perform a manual
4241 // truncation/sign-extension in order to ignore any truncated bits and catch
4242 // any zero-extended immediate.
4243 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
4244 // sign-extending to (XLenVT -1).
4245 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
4246
4247 int64_t SplatImm = SplatConst.getSExtValue();
4248
4249 if (!ValidateImm(SplatImm))
4250 return false;
4251
4252 if (Decrement)
4253 SplatImm -= 1;
4254
4255 SplatVal =
4256 DAG.getSignedTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
4257 return true;
4258}
4259
4261 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
4262 [](int64_t Imm) { return isInt<5>(Imm); });
4263}
4264
4266 return selectVSplatImmHelper(
4267 N, SplatVal, *CurDAG, *Subtarget,
4268 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; },
4269 /*Decrement=*/true);
4270}
4271
4273 return selectVSplatImmHelper(
4274 N, SplatVal, *CurDAG, *Subtarget,
4275 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; },
4276 /*Decrement=*/false);
4277}
4278
4280 SDValue &SplatVal) {
4281 return selectVSplatImmHelper(
4282 N, SplatVal, *CurDAG, *Subtarget,
4283 [](int64_t Imm) {
4284 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
4285 },
4286 /*Decrement=*/true);
4287}
4288
4290 SDValue &SplatVal) {
4291 return selectVSplatImmHelper(
4292 N, SplatVal, *CurDAG, *Subtarget,
4293 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
4294}
4295
4298 return Splat && selectNegImm(Splat.getOperand(1), SplatVal);
4299}
4300
4302 auto IsExtOrTrunc = [](SDValue N) {
4303 switch (N->getOpcode()) {
4304 case ISD::SIGN_EXTEND:
4305 case ISD::ZERO_EXTEND:
4306 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
4307 // inactive elements will be undef.
4308 case RISCVISD::TRUNCATE_VECTOR_VL:
4309 case RISCVISD::VSEXT_VL:
4310 case RISCVISD::VZEXT_VL:
4311 return true;
4312 default:
4313 return false;
4314 }
4315 };
4316
4317 // We can have multiple nested nodes, so unravel them all if needed.
4318 while (IsExtOrTrunc(N)) {
4319 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
4320 return false;
4321 N = N->getOperand(0);
4322 }
4323
4324 return selectVSplat(N, SplatVal);
4325}
4326
4328 // Allow bitcasts from XLenVT -> FP.
4329 if (N.getOpcode() == ISD::BITCAST &&
4330 N.getOperand(0).getValueType() == Subtarget->getXLenVT()) {
4331 Imm = N.getOperand(0);
4332 return true;
4333 }
4334 // Allow moves from XLenVT to FP.
4335 if (N.getOpcode() == RISCVISD::FMV_H_X ||
4336 N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
4337 Imm = N.getOperand(0);
4338 return true;
4339 }
4340
4341 // Otherwise, look for FP constants that can materialized with scalar int.
4343 if (!CFP)
4344 return false;
4345 const APFloat &APF = CFP->getValueAPF();
4346 // td can handle +0.0 already.
4347 if (APF.isPosZero())
4348 return false;
4349
4350 MVT VT = CFP->getSimpleValueType(0);
4351
4352 MVT XLenVT = Subtarget->getXLenVT();
4353 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
4354 assert(APF.isNegZero() && "Unexpected constant.");
4355 return false;
4356 }
4357 SDLoc DL(N);
4358 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
4359 *Subtarget);
4360 return true;
4361}
4362
4364 SDValue &Imm) {
4365 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
4366 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
4367
4368 if (!isInt<5>(ImmVal))
4369 return false;
4370
4371 Imm = CurDAG->getSignedTargetConstant(ImmVal, SDLoc(N),
4372 Subtarget->getXLenVT());
4373 return true;
4374 }
4375
4376 return false;
4377}
4378
4379// Try to remove sext.w if the input is a W instruction or can be made into
4380// a W instruction cheaply.
4381bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
4382 // Look for the sext.w pattern, addiw rd, rs1, 0.
4383 if (N->getMachineOpcode() != RISCV::ADDIW ||
4384 !isNullConstant(N->getOperand(1)))
4385 return false;
4386
4387 SDValue N0 = N->getOperand(0);
4388 if (!N0.isMachineOpcode())
4389 return false;
4390
4391 switch (N0.getMachineOpcode()) {
4392 default:
4393 break;
4394 case RISCV::ADD:
4395 case RISCV::ADDI:
4396 case RISCV::SUB:
4397 case RISCV::MUL:
4398 case RISCV::SLLI: {
4399 // Convert sext.w+add/sub/mul to their W instructions. This will create
4400 // a new independent instruction. This improves latency.
4401 unsigned Opc;
4402 switch (N0.getMachineOpcode()) {
4403 default:
4404 llvm_unreachable("Unexpected opcode!");
4405 case RISCV::ADD: Opc = RISCV::ADDW; break;
4406 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
4407 case RISCV::SUB: Opc = RISCV::SUBW; break;
4408 case RISCV::MUL: Opc = RISCV::MULW; break;
4409 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
4410 }
4411
4412 SDValue N00 = N0.getOperand(0);
4413 SDValue N01 = N0.getOperand(1);
4414
4415 // Shift amount needs to be uimm5.
4416 if (N0.getMachineOpcode() == RISCV::SLLI &&
4417 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
4418 break;
4419
4420 SDNode *Result =
4421 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
4422 N00, N01);
4423 ReplaceUses(N, Result);
4424 return true;
4425 }
4426 case RISCV::ADDW:
4427 case RISCV::ADDIW:
4428 case RISCV::SUBW:
4429 case RISCV::MULW:
4430 case RISCV::SLLIW:
4431 case RISCV::PACKW:
4432 case RISCV::TH_MULAW:
4433 case RISCV::TH_MULAH:
4434 case RISCV::TH_MULSW:
4435 case RISCV::TH_MULSH:
4436 if (N0.getValueType() == MVT::i32)
4437 break;
4438
4439 // Result is already sign extended just remove the sext.w.
4440 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
4441 ReplaceUses(N, N0.getNode());
4442 return true;
4443 }
4444
4445 return false;
4446}
4447
4448static bool usesAllOnesMask(SDValue MaskOp) {
4449 const auto IsVMSet = [](unsigned Opc) {
4450 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
4451 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
4452 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
4453 Opc == RISCV::PseudoVMSET_M_B8;
4454 };
4455
4456 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
4457 // undefined behaviour if it's the wrong bitwidth, so we could choose to
4458 // assume that it's all-ones? Same applies to its VL.
4459 return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode());
4460}
4461
4462static bool isImplicitDef(SDValue V) {
4463 if (!V.isMachineOpcode())
4464 return false;
4465 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
4466 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
4467 if (!isImplicitDef(V.getOperand(I)))
4468 return false;
4469 return true;
4470 }
4471 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
4472}
4473
4474// Optimize masked RVV pseudo instructions with a known all-ones mask to their
4475// corresponding "unmasked" pseudo versions.
4476bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
4477 const RISCV::RISCVMaskedPseudoInfo *I =
4478 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
4479 if (!I)
4480 return false;
4481
4482 unsigned MaskOpIdx = I->MaskOpIdx;
4483 if (!usesAllOnesMask(N->getOperand(MaskOpIdx)))
4484 return false;
4485
4486 // There are two classes of pseudos in the table - compares and
4487 // everything else. See the comment on RISCVMaskedPseudo for details.
4488 const unsigned Opc = I->UnmaskedPseudo;
4489 const MCInstrDesc &MCID = TII->get(Opc);
4490 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID);
4491
4492 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
4493 const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(MaskedMCID);
4494
4495 assert((RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ||
4497 "Unmasked pseudo has policy but masked pseudo doesn't?");
4498 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru &&
4499 "Unexpected pseudo structure");
4500 assert(!(HasPassthru && !MaskedHasPassthru) &&
4501 "Unmasked pseudo has passthru but masked pseudo doesn't?");
4502
4504 // Skip the passthru operand at index 0 if the unmasked don't have one.
4505 bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
4506 bool DropPolicy = !RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
4507 RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags);
4508 bool HasChainOp =
4509 N->getOperand(N->getNumOperands() - 1).getValueType() == MVT::Other;
4510 unsigned LastOpNum = N->getNumOperands() - 1 - HasChainOp;
4511 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
4512 // Skip the mask
4513 SDValue Op = N->getOperand(I);
4514 if (I == MaskOpIdx)
4515 continue;
4516 if (DropPolicy && I == LastOpNum)
4517 continue;
4518 Ops.push_back(Op);
4519 }
4520
4521 MachineSDNode *Result =
4522 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4523
4524 if (!N->memoperands_empty())
4525 CurDAG->setNodeMemRefs(Result, N->memoperands());
4526
4527 Result->setFlags(N->getFlags());
4528 ReplaceUses(N, Result);
4529
4530 return true;
4531}
4532
4533/// If our passthru is an implicit_def, use noreg instead. This side
4534/// steps issues with MachineCSE not being able to CSE expressions with
4535/// IMPLICIT_DEF operands while preserving the semantic intent. See
4536/// pr64282 for context. Note that this transform is the last one
4537/// performed at ISEL DAG to DAG.
4538bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4539 bool MadeChange = false;
4540 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4541
4542 while (Position != CurDAG->allnodes_begin()) {
4543 SDNode *N = &*--Position;
4544 if (N->use_empty() || !N->isMachineOpcode())
4545 continue;
4546
4547 const unsigned Opc = N->getMachineOpcode();
4548 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
4550 !isImplicitDef(N->getOperand(0)))
4551 continue;
4552
4554 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
4555 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4556 SDValue Op = N->getOperand(I);
4557 Ops.push_back(Op);
4558 }
4559
4560 MachineSDNode *Result =
4561 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4562 Result->setFlags(N->getFlags());
4563 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
4564 ReplaceUses(N, Result);
4565 MadeChange = true;
4566 }
4567 return MadeChange;
4568}
4569
4570
4571// This pass converts a legalized DAG into a RISCV-specific DAG, ready
4572// for instruction scheduling.
4574 CodeGenOptLevel OptLevel) {
4575 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4576}
4577
4579
4584
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool getVal(MDTuple *MD, const char *Key, uint64_t &Val)
static bool usesAllOnesMask(SDValue MaskOp)
static Register getTileReg(uint64_t TileNum)
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
static bool isRegRegScaleLoadOrStore(SDNode *User, SDValue Add, const RISCVSubtarget &Subtarget)
Return true if this a load/store that we have a RegRegScale instruction for.
#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm, bool Decrement=false)
static unsigned getSegInstNF(unsigned Intrinsic)
static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget, SDValue Add, SDValue Shift=SDValue())
Is it profitable to fold this Add into RegRegScale load/store.
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false)
#define INST_ALL_NF_CASE_WITH_FF(NAME)
#define CASE_VMSLT_OPCODES(lmulenum, suffix)
bool isRegImmLoadOrStore(SDNode *User, SDValue Add)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
static SDValue findVSplat(SDValue N)
#define INST_ALL_NF_CASE(NAME)
static bool isApplicableToPLI(int Val)
Contains matchers for matching SelectionDAG nodes and values.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
#define PASS_NAME
DEMANGLE_DUMP_METHOD void dump() const
bool isZero() const
Definition APFloat.h:1427
APInt bitcastToAPInt() const
Definition APFloat.h:1335
bool isPosZero() const
Definition APFloat.h:1442
bool isNegZero() const
Definition APFloat.h:1443
Class for arbitrary precision integers.
Definition APInt.h:78
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
This class is used to form a handle around another node that is persistent and is updated across invo...
const SDValue & getValue() const
static StringRef getMemConstraintName(ConstraintCode C)
Definition InlineAsm.h:470
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Machine Value Type.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TargetMachine, CodeGenOptLevel OptLevel)
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectNegImm(SDValue N, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool areOffsetsWithinAlignment(SDValue Addr, Align Alignment)
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset should be all zeros.
bool selectZExtImm32(SDValue N, SDValue &Val)
bool SelectAddrRegZextRegScale(SDValue Addr, unsigned MaxShiftAmount, unsigned Bits, SDValue &Base, SDValue &Index, SDValue &Scale)
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset)
void selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
void selectVLSEGFF(SDNode *Node, unsigned NF, bool IsMasked)
bool selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool orDisjoint(const SDNode *Node) const
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool tryUnsignedBitfieldInsertInZero(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
bool selectInvLogicImm(SDValue N, SDValue &Val)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
void selectXSfmmVSET(SDNode *Node)
bool trySignedBitfieldInsertInSign(SDNode *Node)
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool SelectAddrRegImm9(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
bool selectScalarFPAsInt(SDValue N, SDValue &Imm)
bool hasAllBUsers(SDNode *Node) const
void selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
void selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool selectVSplatImm64Neg(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
void selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
bool hasShlAdd(int64_t ShAmt) const
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVVType::VLMUL getLMUL(MVT VT)
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
iterator_range< user_iterator > users()
Definition Value.h:426
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI unsigned encodeXSfmmVType(unsigned SEW, unsigned Widen, bool AltFmt)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
LLVM_ABI unsigned getSEWLMULRatio(unsigned SEW, VLMUL VLMul)
static unsigned decodeTWiden(unsigned TWiden)
LLVM_ABI unsigned encodeVTYPE(VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic, bool AltFmt=false)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
std::optional< unsigned > getVectorLowDemandedScalarBits(unsigned Opcode, unsigned Log2SEW)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
static const MachineMemOperand::Flags MONontemporalBit1
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
unsigned M1(unsigned Val)
Definition VE.h:377
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
constexpr T maskTrailingZeros(unsigned N)
Create a bitmask with the N right-most bits set to 0, and all other bits set to 1.
Definition MathExtras.h:94
@ Add
Sum of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:867
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.