LLVM 23.0.0git
RISCVInstrInfo.cpp
Go to the documentation of this file.
1//===-- RISCVInstrInfo.cpp - RISC-V Instruction Information -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the RISC-V implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVInstrInfo.h"
16#include "RISCV.h"
18#include "RISCVSubtarget.h"
19#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
33#include "llvm/IR/Module.h"
34#include "llvm/MC/MCDwarf.h"
38
39using namespace llvm;
40
41#define GEN_CHECK_COMPRESS_INSTR
42#include "RISCVGenCompressInstEmitter.inc"
43
44#define GET_INSTRINFO_CTOR_DTOR
45#include "RISCVGenInstrInfo.inc"
46
47#define DEBUG_TYPE "riscv-instr-info"
48STATISTIC(NumVRegSpilled,
49 "Number of registers within vector register groups spilled");
50STATISTIC(NumVRegReloaded,
51 "Number of registers within vector register groups reloaded");
52
54 "riscv-prefer-whole-register-move", cl::init(false), cl::Hidden,
55 cl::desc("Prefer whole register move for vector registers."));
56
58 "riscv-force-machine-combiner-strategy", cl::Hidden,
59 cl::desc("Force machine combiner to use a specific strategy for machine "
60 "trace metrics evaluation."),
63 "Local strategy."),
65 "MinInstrCount strategy.")));
66
68
69using namespace RISCV;
70
71#define GET_RISCVVPseudosTable_IMPL
72#include "RISCVGenSearchableTables.inc"
73
74} // namespace llvm::RISCVVPseudosTable
75
76namespace llvm::RISCV {
77
78#define GET_RISCVMaskedPseudosTable_IMPL
79#include "RISCVGenSearchableTables.inc"
80
81} // end namespace llvm::RISCV
82
84 : RISCVGenInstrInfo(STI, RegInfo, RISCV::ADJCALLSTACKDOWN,
85 RISCV::ADJCALLSTACKUP),
86 RegInfo(STI.getHwMode()), STI(STI) {}
87
88#define GET_INSTRINFO_HELPERS
89#include "RISCVGenInstrInfo.inc"
90
92 if (STI.hasStdExtZca())
93 return MCInstBuilder(RISCV::C_NOP);
94 return MCInstBuilder(RISCV::ADDI)
95 .addReg(RISCV::X0)
96 .addReg(RISCV::X0)
97 .addImm(0);
98}
99
101 int &FrameIndex) const {
102 TypeSize Dummy = TypeSize::getZero();
103 return isLoadFromStackSlot(MI, FrameIndex, Dummy);
104}
105
106static std::optional<unsigned> getLMULForRVVWholeLoadStore(unsigned Opcode) {
107 switch (Opcode) {
108 default:
109 return std::nullopt;
110 case RISCV::VS1R_V:
111 case RISCV::VL1RE8_V:
112 case RISCV::VL1RE16_V:
113 case RISCV::VL1RE32_V:
114 case RISCV::VL1RE64_V:
115 return 1;
116 case RISCV::VS2R_V:
117 case RISCV::VL2RE8_V:
118 case RISCV::VL2RE16_V:
119 case RISCV::VL2RE32_V:
120 case RISCV::VL2RE64_V:
121 return 2;
122 case RISCV::VS4R_V:
123 case RISCV::VL4RE8_V:
124 case RISCV::VL4RE16_V:
125 case RISCV::VL4RE32_V:
126 case RISCV::VL4RE64_V:
127 return 4;
128 case RISCV::VS8R_V:
129 case RISCV::VL8RE8_V:
130 case RISCV::VL8RE16_V:
131 case RISCV::VL8RE32_V:
132 case RISCV::VL8RE64_V:
133 return 8;
134 }
135}
136
138 int &FrameIndex,
139 TypeSize &MemBytes) const {
140 switch (MI.getOpcode()) {
141 default:
142 return 0;
143 case RISCV::LB:
144 case RISCV::LBU:
145 MemBytes = TypeSize::getFixed(1);
146 break;
147 case RISCV::LH:
148 case RISCV::LH_INX:
149 case RISCV::LHU:
150 case RISCV::FLH:
151 MemBytes = TypeSize::getFixed(2);
152 break;
153 case RISCV::LW:
154 case RISCV::LW_INX:
155 case RISCV::FLW:
156 case RISCV::LWU:
157 MemBytes = TypeSize::getFixed(4);
158 break;
159 case RISCV::LD:
160 case RISCV::LD_RV32:
161 case RISCV::FLD:
162 MemBytes = TypeSize::getFixed(8);
163 break;
164 case RISCV::VL1RE8_V:
165 case RISCV::VL2RE8_V:
166 case RISCV::VL4RE8_V:
167 case RISCV::VL8RE8_V:
168 if (!MI.getOperand(1).isFI())
169 return Register();
170 FrameIndex = MI.getOperand(1).getIndex();
171 unsigned LMUL = *getLMULForRVVWholeLoadStore(MI.getOpcode());
173 return MI.getOperand(0).getReg();
174 }
175
176 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
177 MI.getOperand(2).getImm() == 0) {
178 FrameIndex = MI.getOperand(1).getIndex();
179 return MI.getOperand(0).getReg();
180 }
181
182 return 0;
183}
184
186 int &FrameIndex) const {
187 TypeSize Dummy = TypeSize::getZero();
188 return isStoreToStackSlot(MI, FrameIndex, Dummy);
189}
190
192 int &FrameIndex,
193 TypeSize &MemBytes) const {
194 switch (MI.getOpcode()) {
195 default:
196 return 0;
197 case RISCV::SB:
198 MemBytes = TypeSize::getFixed(1);
199 break;
200 case RISCV::SH:
201 case RISCV::SH_INX:
202 case RISCV::FSH:
203 MemBytes = TypeSize::getFixed(2);
204 break;
205 case RISCV::SW:
206 case RISCV::SW_INX:
207 case RISCV::FSW:
208 MemBytes = TypeSize::getFixed(4);
209 break;
210 case RISCV::SD:
211 case RISCV::SD_RV32:
212 case RISCV::FSD:
213 MemBytes = TypeSize::getFixed(8);
214 break;
215 case RISCV::VS1R_V:
216 case RISCV::VS2R_V:
217 case RISCV::VS4R_V:
218 case RISCV::VS8R_V:
219 if (!MI.getOperand(1).isFI())
220 return Register();
221 FrameIndex = MI.getOperand(1).getIndex();
222 unsigned LMUL = *getLMULForRVVWholeLoadStore(MI.getOpcode());
224 return MI.getOperand(0).getReg();
225 }
226
227 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
228 MI.getOperand(2).getImm() == 0) {
229 FrameIndex = MI.getOperand(1).getIndex();
230 return MI.getOperand(0).getReg();
231 }
232
233 return 0;
234}
235
237 const MachineInstr &MI) const {
238 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
239 case RISCV::VMV_V_X:
240 case RISCV::VFMV_V_F:
241 case RISCV::VMV_V_I:
242 case RISCV::VMV_S_X:
243 case RISCV::VFMV_S_F:
244 case RISCV::VID_V:
245 return MI.getOperand(1).isUndef();
246 default:
248 }
249}
250
251static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg,
252 unsigned NumRegs) {
253 return DstReg > SrcReg && (DstReg - SrcReg) < NumRegs;
254}
255
257 const MachineBasicBlock &MBB,
260 RISCVVType::VLMUL LMul) {
262 return false;
263
264 assert(MBBI->getOpcode() == TargetOpcode::COPY &&
265 "Unexpected COPY instruction.");
266 Register SrcReg = MBBI->getOperand(1).getReg();
268
269 bool FoundDef = false;
270 bool FirstVSetVLI = false;
271 unsigned FirstSEW = 0;
272 while (MBBI != MBB.begin()) {
273 --MBBI;
274 if (MBBI->isMetaInstruction())
275 continue;
276
277 if (RISCVInstrInfo::isVectorConfigInstr(*MBBI)) {
278 // There is a vsetvli between COPY and source define instruction.
279 // vy = def_vop ... (producing instruction)
280 // ...
281 // vsetvli
282 // ...
283 // vx = COPY vy
284 if (!FoundDef) {
285 if (!FirstVSetVLI) {
286 FirstVSetVLI = true;
287 unsigned FirstVType = MBBI->getOperand(2).getImm();
288 RISCVVType::VLMUL FirstLMul = RISCVVType::getVLMUL(FirstVType);
289 FirstSEW = RISCVVType::getSEW(FirstVType);
290 // The first encountered vsetvli must have the same lmul as the
291 // register class of COPY.
292 if (FirstLMul != LMul)
293 return false;
294 }
295 // Only permit `vsetvli x0, x0, vtype` between COPY and the source
296 // define instruction.
297 if (!RISCVInstrInfo::isVLPreservingConfig(*MBBI))
298 return false;
299 continue;
300 }
301
302 // MBBI is the first vsetvli before the producing instruction.
303 unsigned VType = MBBI->getOperand(2).getImm();
304 // If there is a vsetvli between COPY and the producing instruction.
305 if (FirstVSetVLI) {
306 // If SEW is different, return false.
307 if (RISCVVType::getSEW(VType) != FirstSEW)
308 return false;
309 }
310
311 // If the vsetvli is tail undisturbed, keep the whole register move.
312 if (!RISCVVType::isTailAgnostic(VType))
313 return false;
314
315 // The checking is conservative. We only have register classes for
316 // LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v
317 // for fractional LMUL operations. However, we could not use the vsetvli
318 // lmul for widening operations. The result of widening operation is
319 // 2 x LMUL.
320 return LMul == RISCVVType::getVLMUL(VType);
321 } else if (MBBI->isInlineAsm() || MBBI->isCall()) {
322 return false;
323 } else if (MBBI->getNumDefs()) {
324 // Check all the instructions which will change VL.
325 // For example, vleff has implicit def VL.
326 if (MBBI->modifiesRegister(RISCV::VL, /*TRI=*/nullptr))
327 return false;
328
329 // Only converting whole register copies to vmv.v.v when the defining
330 // value appears in the explicit operands.
331 for (const MachineOperand &MO : MBBI->explicit_operands()) {
332 if (!MO.isReg() || !MO.isDef())
333 continue;
334 if (!FoundDef && TRI->regsOverlap(MO.getReg(), SrcReg)) {
335 // We only permit the source of COPY has the same LMUL as the defined
336 // operand.
337 // There are cases we need to keep the whole register copy if the LMUL
338 // is different.
339 // For example,
340 // $x0 = PseudoVSETIVLI 4, 73 // vsetivli zero, 4, e16,m2,ta,m
341 // $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2
342 // # The COPY may be created by vlmul_trunc intrinsic.
343 // $v26m2 = COPY renamable $v28m2, implicit killed $v28m4
344 //
345 // After widening, the valid value will be 4 x e32 elements. If we
346 // convert the COPY to vmv.v.v, it will only copy 4 x e16 elements.
347 // FIXME: The COPY of subregister of Zvlsseg register will not be able
348 // to convert to vmv.v.[v|i] under the constraint.
349 if (MO.getReg() != SrcReg)
350 return false;
351
352 // In widening reduction instructions with LMUL_1 input vector case,
353 // only checking the LMUL is insufficient due to reduction result is
354 // always LMUL_1.
355 // For example,
356 // $x11 = PseudoVSETIVLI 1, 64 // vsetivli a1, 1, e8, m1, ta, mu
357 // $v8m1 = PseudoVWREDSUM_VS_M1 $v26, $v27
358 // $v26 = COPY killed renamable $v8
359 // After widening, The valid value will be 1 x e16 elements. If we
360 // convert the COPY to vmv.v.v, it will only copy 1 x e8 elements.
361 uint64_t TSFlags = MBBI->getDesc().TSFlags;
363 return false;
364
365 // If the producing instruction does not depend on vsetvli, do not
366 // convert COPY to vmv.v.v. For example, VL1R_V or PseudoVRELOAD.
367 if (!RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasVLOp(TSFlags))
368 return false;
369
370 // Found the definition.
371 FoundDef = true;
372 DefMBBI = MBBI;
373 break;
374 }
375 }
376 }
377 }
378
379 return false;
380}
381
384 const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc,
385 const TargetRegisterClass *RegClass) const {
386 const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
388 unsigned NF = RISCVRI::getNF(RegClass->TSFlags);
389
390 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
391 uint16_t DstEncoding = TRI->getEncodingValue(DstReg);
392 auto [LMulVal, Fractional] = RISCVVType::decodeVLMUL(LMul);
393 assert(!Fractional && "It is impossible be fractional lmul here.");
394 unsigned NumRegs = NF * LMulVal;
395 bool ReversedCopy =
396 forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NumRegs);
397 if (ReversedCopy) {
398 // If the src and dest overlap when copying a tuple, we need to copy the
399 // registers in reverse.
400 SrcEncoding += NumRegs - 1;
401 DstEncoding += NumRegs - 1;
402 }
403
404 unsigned I = 0;
405 auto GetCopyInfo = [&](uint16_t SrcEncoding, uint16_t DstEncoding)
406 -> std::tuple<RISCVVType::VLMUL, const TargetRegisterClass &, unsigned,
407 unsigned, unsigned> {
408 if (ReversedCopy) {
409 // For reversed copying, if there are enough aligned registers(8/4/2), we
410 // can do a larger copy(LMUL8/4/2).
411 // Besides, we have already known that DstEncoding is larger than
412 // SrcEncoding in forwardCopyWillClobberTuple, so the difference between
413 // DstEncoding and SrcEncoding should be >= LMUL value we try to use to
414 // avoid clobbering.
415 uint16_t Diff = DstEncoding - SrcEncoding;
416 if (I + 8 <= NumRegs && Diff >= 8 && SrcEncoding % 8 == 7 &&
417 DstEncoding % 8 == 7)
418 return {RISCVVType::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V,
419 RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8};
420 if (I + 4 <= NumRegs && Diff >= 4 && SrcEncoding % 4 == 3 &&
421 DstEncoding % 4 == 3)
422 return {RISCVVType::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V,
423 RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4};
424 if (I + 2 <= NumRegs && Diff >= 2 && SrcEncoding % 2 == 1 &&
425 DstEncoding % 2 == 1)
426 return {RISCVVType::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V,
427 RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2};
428 // Or we should do LMUL1 copying.
429 return {RISCVVType::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
430 RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
431 }
432
433 // For forward copying, if source register encoding and destination register
434 // encoding are aligned to 8/4/2, we can do a LMUL8/4/2 copying.
435 if (I + 8 <= NumRegs && SrcEncoding % 8 == 0 && DstEncoding % 8 == 0)
436 return {RISCVVType::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V,
437 RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8};
438 if (I + 4 <= NumRegs && SrcEncoding % 4 == 0 && DstEncoding % 4 == 0)
439 return {RISCVVType::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V,
440 RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4};
441 if (I + 2 <= NumRegs && SrcEncoding % 2 == 0 && DstEncoding % 2 == 0)
442 return {RISCVVType::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V,
443 RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2};
444 // Or we should do LMUL1 copying.
445 return {RISCVVType::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
446 RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
447 };
448
449 while (I != NumRegs) {
450 // For non-segment copying, we only do this once as the registers are always
451 // aligned.
452 // For segment copying, we may do this several times. If the registers are
453 // aligned to larger LMUL, we can eliminate some copyings.
454 auto [LMulCopied, RegClass, Opc, VVOpc, VIOpc] =
455 GetCopyInfo(SrcEncoding, DstEncoding);
456 auto [NumCopied, _] = RISCVVType::decodeVLMUL(LMulCopied);
457
459 if (LMul == LMulCopied &&
460 isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) {
461 Opc = VVOpc;
462 if (DefMBBI->getOpcode() == VIOpc)
463 Opc = VIOpc;
464 }
465
466 // Emit actual copying.
467 // For reversed copying, the encoding should be decreased.
468 MCRegister ActualSrcReg = TRI->findVRegWithEncoding(
469 RegClass, ReversedCopy ? (SrcEncoding - NumCopied + 1) : SrcEncoding);
470 MCRegister ActualDstReg = TRI->findVRegWithEncoding(
471 RegClass, ReversedCopy ? (DstEncoding - NumCopied + 1) : DstEncoding);
472
473 auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), ActualDstReg);
474 bool UseVMV_V_I = RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_I;
475 bool UseVMV = UseVMV_V_I || RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_V;
476 if (UseVMV)
477 MIB.addReg(ActualDstReg, RegState::Undef);
478 if (UseVMV_V_I)
479 MIB = MIB.add(DefMBBI->getOperand(2));
480 else
481 MIB = MIB.addReg(ActualSrcReg, getKillRegState(KillSrc));
482 if (UseVMV) {
483 const MCInstrDesc &Desc = DefMBBI->getDesc();
484 MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL
485 unsigned Log2SEW =
486 DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
487 MIB.addImm(Log2SEW ? Log2SEW : 3); // SEW
488 MIB.addImm(0); // tu, mu
489 MIB.addReg(RISCV::VL, RegState::Implicit);
490 MIB.addReg(RISCV::VTYPE, RegState::Implicit);
491 }
492 // Add an implicit read of the original source to silence the verifier
493 // in the cases where some of the smaller VRs we're copying from might be
494 // undef, caused by the fact that the original, larger source VR might not
495 // be fully initialized at the time this COPY happens.
496 MIB.addReg(SrcReg, RegState::Implicit);
497
498 // If we are copying reversely, we should decrease the encoding.
499 SrcEncoding += (ReversedCopy ? -NumCopied : NumCopied);
500 DstEncoding += (ReversedCopy ? -NumCopied : NumCopied);
501 I += NumCopied;
502 }
503}
504
507 const DebugLoc &DL, Register DstReg,
508 Register SrcReg, bool KillSrc,
509 bool RenamableDest, bool RenamableSrc) const {
510 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
511 RegState KillFlag = getKillRegState(KillSrc);
512
513 if (RISCV::GPRRegClass.contains(DstReg, SrcReg)) {
514 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg)
515 .addReg(SrcReg, KillFlag | getRenamableRegState(RenamableSrc))
516 .addImm(0);
517 return;
518 }
519
520 if (RISCV::GPRF16RegClass.contains(DstReg, SrcReg)) {
521 BuildMI(MBB, MBBI, DL, get(RISCV::PseudoMV_FPR16INX), DstReg)
522 .addReg(SrcReg, KillFlag | getRenamableRegState(RenamableSrc));
523 return;
524 }
525
526 if (RISCV::GPRF32RegClass.contains(DstReg, SrcReg)) {
527 BuildMI(MBB, MBBI, DL, get(RISCV::PseudoMV_FPR32INX), DstReg)
528 .addReg(SrcReg, KillFlag | getRenamableRegState(RenamableSrc));
529 return;
530 }
531
532 if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) {
533 if (STI.isRV32()) {
534 if (STI.hasStdExtZdinx()) {
535 // On RV32_Zdinx, FMV.D will move a pair of registers to another pair of
536 // registers, in one instruction.
537 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_D_IN32X), DstReg)
538 .addReg(SrcReg, getRenamableRegState(RenamableSrc))
539 .addReg(SrcReg, KillFlag | getRenamableRegState(RenamableSrc));
540 return;
541 }
542
543 if (STI.hasStdExtP()) {
544 // On RV32P, `padd.dw` is a GPR Pair Add
545 BuildMI(MBB, MBBI, DL, get(RISCV::PADD_DW), DstReg)
546 .addReg(SrcReg, KillFlag | getRenamableRegState(RenamableSrc))
547 .addReg(RISCV::X0_Pair);
548 return;
549 }
550 }
551
552 MCRegister EvenReg = TRI->getSubReg(SrcReg, RISCV::sub_gpr_even);
553 MCRegister OddReg = TRI->getSubReg(SrcReg, RISCV::sub_gpr_odd);
554 // We need to correct the odd register of X0_Pair.
555 if (OddReg == RISCV::DUMMY_REG_PAIR_WITH_X0)
556 OddReg = RISCV::X0;
557 assert(DstReg != RISCV::X0_Pair && "Cannot write to X0_Pair");
558
559 // Emit an ADDI for both parts of GPRPair.
560 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
561 TRI->getSubReg(DstReg, RISCV::sub_gpr_even))
562 .addReg(EvenReg, KillFlag)
563 .addImm(0);
564 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
565 TRI->getSubReg(DstReg, RISCV::sub_gpr_odd))
566 .addReg(OddReg, KillFlag)
567 .addImm(0);
568 return;
569 }
570
571 // Handle copy from csr
572 if (RISCV::VCSRRegClass.contains(SrcReg) &&
573 RISCV::GPRRegClass.contains(DstReg)) {
574 BuildMI(MBB, MBBI, DL, get(RISCV::CSRRS), DstReg)
575 .addImm(RISCVSysReg::lookupSysRegByName(TRI->getName(SrcReg))->Encoding)
576 .addReg(RISCV::X0);
577 return;
578 }
579
580 if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) {
581 unsigned Opc;
582 if (STI.hasStdExtZfh()) {
583 Opc = RISCV::FSGNJ_H;
584 } else {
585 assert(STI.hasStdExtF() &&
586 (STI.hasStdExtZfhmin() || STI.hasStdExtZfbfmin()) &&
587 "Unexpected extensions");
588 // Zfhmin/Zfbfmin doesn't have FSGNJ_H, replace FSGNJ_H with FSGNJ_S.
589 DstReg = TRI->getMatchingSuperReg(DstReg, RISCV::sub_16,
590 &RISCV::FPR32RegClass);
591 SrcReg = TRI->getMatchingSuperReg(SrcReg, RISCV::sub_16,
592 &RISCV::FPR32RegClass);
593 Opc = RISCV::FSGNJ_S;
594 }
595 BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
596 .addReg(SrcReg, KillFlag)
597 .addReg(SrcReg, KillFlag);
598 return;
599 }
600
601 if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) {
602 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_S), DstReg)
603 .addReg(SrcReg, KillFlag)
604 .addReg(SrcReg, KillFlag);
605 return;
606 }
607
608 if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) {
609 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_D), DstReg)
610 .addReg(SrcReg, KillFlag)
611 .addReg(SrcReg, KillFlag);
612 return;
613 }
614
615 if (RISCV::FPR32RegClass.contains(DstReg) &&
616 RISCV::GPRRegClass.contains(SrcReg)) {
617 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_W_X), DstReg)
618 .addReg(SrcReg, KillFlag);
619 return;
620 }
621
622 if (RISCV::GPRRegClass.contains(DstReg) &&
623 RISCV::FPR32RegClass.contains(SrcReg)) {
624 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_W), DstReg)
625 .addReg(SrcReg, KillFlag);
626 return;
627 }
628
629 if (RISCV::FPR64RegClass.contains(DstReg) &&
630 RISCV::GPRRegClass.contains(SrcReg)) {
631 assert(STI.getXLen() == 64 && "Unexpected GPR size");
632 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_D_X), DstReg)
633 .addReg(SrcReg, KillFlag);
634 return;
635 }
636
637 if (RISCV::GPRRegClass.contains(DstReg) &&
638 RISCV::FPR64RegClass.contains(SrcReg)) {
639 assert(STI.getXLen() == 64 && "Unexpected GPR size");
640 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_D), DstReg)
641 .addReg(SrcReg, KillFlag);
642 return;
643 }
644
645 // VR->VR copies.
646 const TargetRegisterClass *RegClass =
647 TRI->getCommonMinimalPhysRegClass(SrcReg, DstReg);
648 if (RISCVRegisterInfo::isRVVRegClass(RegClass)) {
649 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RegClass);
650 return;
651 }
652
653 llvm_unreachable("Impossible reg-to-reg copy");
654}
655
658 Register SrcReg, bool IsKill, int FI,
659 const TargetRegisterClass *RC,
660 Register VReg,
661 MachineInstr::MIFlag Flags) const {
662 MachineFunction *MF = MBB.getParent();
663 MachineFrameInfo &MFI = MF->getFrameInfo();
664 Align Alignment = MFI.getObjectAlign(FI);
665
666 unsigned Opcode;
667 if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
668 Opcode = RegInfo.getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::SW
669 : RISCV::SD;
670 } else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) {
671 Opcode = RISCV::SH_INX;
672 } else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) {
673 Opcode = RISCV::SW_INX;
674 } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
675 if (!STI.is64Bit() && STI.hasStdExtZilsd() &&
676 Alignment >= STI.getZilsdAlign()) {
677 Opcode = RISCV::SD_RV32;
678 } else {
679 Opcode = RISCV::PseudoRV32ZdinxSD;
680 }
681 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
682 Opcode = RISCV::FSH;
683 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
684 Opcode = RISCV::FSW;
685 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
686 Opcode = RISCV::FSD;
687 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
688 Opcode = RISCV::VS1R_V;
689 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
690 Opcode = RISCV::VS2R_V;
691 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
692 Opcode = RISCV::VS4R_V;
693 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
694 Opcode = RISCV::VS8R_V;
695 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
696 Opcode = RISCV::PseudoVSPILL2_M1;
697 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
698 Opcode = RISCV::PseudoVSPILL2_M2;
699 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
700 Opcode = RISCV::PseudoVSPILL2_M4;
701 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
702 Opcode = RISCV::PseudoVSPILL3_M1;
703 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
704 Opcode = RISCV::PseudoVSPILL3_M2;
705 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
706 Opcode = RISCV::PseudoVSPILL4_M1;
707 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
708 Opcode = RISCV::PseudoVSPILL4_M2;
709 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
710 Opcode = RISCV::PseudoVSPILL5_M1;
711 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
712 Opcode = RISCV::PseudoVSPILL6_M1;
713 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
714 Opcode = RISCV::PseudoVSPILL7_M1;
715 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
716 Opcode = RISCV::PseudoVSPILL8_M1;
717 else
718 llvm_unreachable("Can't store this register to stack slot");
719
723 TypeSize::getScalable(MFI.getObjectSize(FI)), Alignment);
724
726 BuildMI(MBB, I, DebugLoc(), get(Opcode))
727 .addReg(SrcReg, getKillRegState(IsKill))
728 .addFrameIndex(FI)
729 .addMemOperand(MMO)
730 .setMIFlag(Flags);
731 NumVRegSpilled += RegInfo.getRegSizeInBits(*RC) / RISCV::RVVBitsPerBlock;
732 } else {
735 MFI.getObjectSize(FI), Alignment);
736
737 BuildMI(MBB, I, DebugLoc(), get(Opcode))
738 .addReg(SrcReg, getKillRegState(IsKill))
739 .addFrameIndex(FI)
740 .addImm(0)
741 .addMemOperand(MMO)
742 .setMIFlag(Flags);
743 }
744}
745
748 Register DstReg, int FI,
749 const TargetRegisterClass *RC,
750 Register VReg, unsigned SubReg,
751 MachineInstr::MIFlag Flags) const {
752 MachineFunction *MF = MBB.getParent();
753 MachineFrameInfo &MFI = MF->getFrameInfo();
754 Align Alignment = MFI.getObjectAlign(FI);
755 DebugLoc DL =
756 Flags & MachineInstr::FrameDestroy ? MBB.findDebugLoc(I) : DebugLoc();
757
758 unsigned Opcode;
759 if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
760 Opcode = RegInfo.getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::LW
761 : RISCV::LD;
762 } else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) {
763 Opcode = RISCV::LH_INX;
764 } else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) {
765 Opcode = RISCV::LW_INX;
766 } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
767 if (!STI.is64Bit() && STI.hasStdExtZilsd() &&
768 Alignment >= STI.getZilsdAlign()) {
769 Opcode = RISCV::LD_RV32;
770 } else {
771 Opcode = RISCV::PseudoRV32ZdinxLD;
772 }
773 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
774 Opcode = RISCV::FLH;
775 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
776 Opcode = RISCV::FLW;
777 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
778 Opcode = RISCV::FLD;
779 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
780 Opcode = RISCV::VL1RE8_V;
781 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
782 Opcode = RISCV::VL2RE8_V;
783 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
784 Opcode = RISCV::VL4RE8_V;
785 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
786 Opcode = RISCV::VL8RE8_V;
787 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
788 Opcode = RISCV::PseudoVRELOAD2_M1;
789 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
790 Opcode = RISCV::PseudoVRELOAD2_M2;
791 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
792 Opcode = RISCV::PseudoVRELOAD2_M4;
793 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
794 Opcode = RISCV::PseudoVRELOAD3_M1;
795 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
796 Opcode = RISCV::PseudoVRELOAD3_M2;
797 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
798 Opcode = RISCV::PseudoVRELOAD4_M1;
799 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
800 Opcode = RISCV::PseudoVRELOAD4_M2;
801 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
802 Opcode = RISCV::PseudoVRELOAD5_M1;
803 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
804 Opcode = RISCV::PseudoVRELOAD6_M1;
805 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
806 Opcode = RISCV::PseudoVRELOAD7_M1;
807 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
808 Opcode = RISCV::PseudoVRELOAD8_M1;
809 else
810 llvm_unreachable("Can't load this register from stack slot");
811
815 TypeSize::getScalable(MFI.getObjectSize(FI)), Alignment);
816
818 BuildMI(MBB, I, DL, get(Opcode), DstReg)
819 .addFrameIndex(FI)
820 .addMemOperand(MMO)
821 .setMIFlag(Flags);
822 NumVRegReloaded += RegInfo.getRegSizeInBits(*RC) / RISCV::RVVBitsPerBlock;
823 } else {
826 MFI.getObjectSize(FI), Alignment);
827
828 BuildMI(MBB, I, DL, get(Opcode), DstReg)
829 .addFrameIndex(FI)
830 .addImm(0)
831 .addMemOperand(MMO)
832 .setMIFlag(Flags);
833 }
834}
835std::optional<unsigned> getFoldedOpcode(MachineFunction &MF, MachineInstr &MI,
837 const RISCVSubtarget &ST) {
838
839 // The below optimizations narrow the load so they are only valid for little
840 // endian.
841 // TODO: Support big endian by adding an offset into the frame object?
842 if (MF.getDataLayout().isBigEndian())
843 return std::nullopt;
844
845 // Fold load from stack followed by sext.b/sext.h/sext.w/zext.b/zext.h/zext.w.
846 if (Ops.size() != 1 || Ops[0] != 1)
847 return std::nullopt;
848
849 switch (MI.getOpcode()) {
850 default:
851 if (RISCVInstrInfo::isSEXT_W(MI))
852 return RISCV::LW;
853 if (RISCVInstrInfo::isZEXT_W(MI))
854 return RISCV::LWU;
855 if (RISCVInstrInfo::isZEXT_B(MI))
856 return RISCV::LBU;
857 break;
858 case RISCV::SEXT_H:
859 return RISCV::LH;
860 case RISCV::SEXT_B:
861 return RISCV::LB;
862 case RISCV::ZEXT_H_RV32:
863 case RISCV::ZEXT_H_RV64:
864 return RISCV::LHU;
865 }
866
867 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
868 default:
869 return std::nullopt;
870 case RISCV::VMV_X_S: {
871 unsigned Log2SEW =
872 MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
873 if (ST.getXLen() < (1U << Log2SEW))
874 return std::nullopt;
875 switch (Log2SEW) {
876 case 3:
877 return RISCV::LB;
878 case 4:
879 return RISCV::LH;
880 case 5:
881 return RISCV::LW;
882 case 6:
883 return RISCV::LD;
884 default:
885 llvm_unreachable("Unexpected SEW");
886 }
887 }
888 case RISCV::VFMV_F_S: {
889 unsigned Log2SEW =
890 MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
891 switch (Log2SEW) {
892 case 4:
893 return RISCV::FLH;
894 case 5:
895 return RISCV::FLW;
896 case 6:
897 return RISCV::FLD;
898 default:
899 llvm_unreachable("Unexpected SEW");
900 }
901 }
902 }
903}
904
905// This is the version used during InlineSpiller::spillAroundUses
908 MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS,
909 VirtRegMap *VRM) const {
910
911 std::optional<unsigned> LoadOpc = getFoldedOpcode(MF, MI, Ops, STI);
912 if (!LoadOpc)
913 return nullptr;
914 Register DstReg = MI.getOperand(0).getReg();
915 return BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(*LoadOpc),
916 DstReg)
917 .addFrameIndex(FrameIndex)
918 .addImm(0);
919}
920
921static unsigned getLoadPredicatedOpcode(unsigned Opcode) {
922 switch (Opcode) {
923 case RISCV::LB:
924 return RISCV::PseudoCCLB;
925 case RISCV::LBU:
926 return RISCV::PseudoCCLBU;
927 case RISCV::LH:
928 return RISCV::PseudoCCLH;
929 case RISCV::LHU:
930 return RISCV::PseudoCCLHU;
931 case RISCV::LW:
932 return RISCV::PseudoCCLW;
933 case RISCV::LWU:
934 return RISCV::PseudoCCLWU;
935 case RISCV::LD:
936 return RISCV::PseudoCCLD;
937 case RISCV::QC_E_LB:
938 return RISCV::PseudoCCQC_E_LB;
939 case RISCV::QC_E_LBU:
940 return RISCV::PseudoCCQC_E_LBU;
941 case RISCV::QC_E_LH:
942 return RISCV::PseudoCCQC_E_LH;
943 case RISCV::QC_E_LHU:
944 return RISCV::PseudoCCQC_E_LHU;
945 case RISCV::QC_E_LW:
946 return RISCV::PseudoCCQC_E_LW;
947 default:
948 return 0;
949 }
950}
951
955 LiveIntervals *LIS) const {
956 // For now, only handle RISCV::PseudoCCMOVGPR.
957 if (MI.getOpcode() != RISCV::PseudoCCMOVGPR)
958 return nullptr;
959
960 unsigned PredOpc = getLoadPredicatedOpcode(LoadMI.getOpcode());
961
962 if (!STI.hasShortForwardBranchILoad() || !PredOpc)
963 return nullptr;
964
966 if (Ops.size() != 1 || (Ops[0] != 4 && Ops[0] != 5))
967 return nullptr;
968
969 bool Invert = Ops[0] == 5;
970 const MachineOperand &FalseReg = MI.getOperand(!Invert ? 5 : 4);
971 Register DestReg = MI.getOperand(0).getReg();
972 const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
973 if (!MRI.constrainRegClass(DestReg, PreviousClass))
974 return nullptr;
975
976 // Create a new predicated version of DefMI.
977 MachineInstrBuilder NewMI = BuildMI(*MI.getParent(), InsertPt,
978 MI.getDebugLoc(), get(PredOpc), DestReg)
979 .add({MI.getOperand(1), MI.getOperand(2)});
980
981 // Add condition code, inverting if necessary.
982 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
983 if (!Invert)
985 NewMI.addImm(CC);
986
987 // Copy the false register.
988 NewMI.add(FalseReg);
989
990 // Copy all the DefMI operands.
991 const MCInstrDesc &DefDesc = LoadMI.getDesc();
992 for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i)
993 NewMI.add(LoadMI.getOperand(i));
994
995 NewMI.cloneMemRefs(LoadMI);
996 return NewMI;
997}
998
1001 const DebugLoc &DL, Register DstReg, uint64_t Val,
1002 MachineInstr::MIFlag Flag, bool DstRenamable,
1003 bool DstIsDead) const {
1004 Register SrcReg = RISCV::X0;
1005
1006 // For RV32, allow a sign or unsigned 32 bit value.
1007 if (!STI.is64Bit() && !isInt<32>(Val)) {
1008 // If have a uimm32 it will still fit in a register so we can allow it.
1009 if (!isUInt<32>(Val))
1010 report_fatal_error("Should only materialize 32-bit constants for RV32");
1011
1012 // Sign extend for generateInstSeq.
1013 Val = SignExtend64<32>(Val);
1014 }
1015
1017 assert(!Seq.empty());
1018
1019 bool SrcRenamable = false;
1020 unsigned Num = 0;
1021
1022 for (const RISCVMatInt::Inst &Inst : Seq) {
1023 bool LastItem = ++Num == Seq.size();
1024 RegState DstRegState = getDeadRegState(DstIsDead && LastItem) |
1025 getRenamableRegState(DstRenamable);
1026 RegState SrcRegState = getKillRegState(SrcReg != RISCV::X0) |
1027 getRenamableRegState(SrcRenamable);
1028 switch (Inst.getOpndKind()) {
1029 case RISCVMatInt::Imm:
1030 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
1031 .addReg(DstReg, RegState::Define | DstRegState)
1032 .addImm(Inst.getImm())
1033 .setMIFlag(Flag);
1034 break;
1035 case RISCVMatInt::RegX0:
1036 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
1037 .addReg(DstReg, RegState::Define | DstRegState)
1038 .addReg(SrcReg, SrcRegState)
1039 .addReg(RISCV::X0)
1040 .setMIFlag(Flag);
1041 break;
1043 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
1044 .addReg(DstReg, RegState::Define | DstRegState)
1045 .addReg(SrcReg, SrcRegState)
1046 .addReg(SrcReg, SrcRegState)
1047 .setMIFlag(Flag);
1048 break;
1050 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
1051 .addReg(DstReg, RegState::Define | DstRegState)
1052 .addReg(SrcReg, SrcRegState)
1053 .addImm(Inst.getImm())
1054 .setMIFlag(Flag);
1055 break;
1056 }
1057
1058 // Only the first instruction has X0 as its source.
1059 SrcReg = DstReg;
1060 SrcRenamable = DstRenamable;
1061 }
1062}
1063
1065 switch (Opc) {
1066 default:
1067 return RISCVCC::COND_INVALID;
1068 case RISCV::BEQ:
1069 case RISCV::BEQI:
1070 case RISCV::CV_BEQIMM:
1071 case RISCV::QC_BEQI:
1072 case RISCV::QC_E_BEQI:
1073 case RISCV::NDS_BBC:
1074 case RISCV::NDS_BEQC:
1075 return RISCVCC::COND_EQ;
1076 case RISCV::BNE:
1077 case RISCV::BNEI:
1078 case RISCV::QC_BNEI:
1079 case RISCV::QC_E_BNEI:
1080 case RISCV::CV_BNEIMM:
1081 case RISCV::NDS_BBS:
1082 case RISCV::NDS_BNEC:
1083 return RISCVCC::COND_NE;
1084 case RISCV::BLT:
1085 case RISCV::QC_BLTI:
1086 case RISCV::QC_E_BLTI:
1087 return RISCVCC::COND_LT;
1088 case RISCV::BGE:
1089 case RISCV::QC_BGEI:
1090 case RISCV::QC_E_BGEI:
1091 return RISCVCC::COND_GE;
1092 case RISCV::BLTU:
1093 case RISCV::QC_BLTUI:
1094 case RISCV::QC_E_BLTUI:
1095 return RISCVCC::COND_LTU;
1096 case RISCV::BGEU:
1097 case RISCV::QC_BGEUI:
1098 case RISCV::QC_E_BGEUI:
1099 return RISCVCC::COND_GEU;
1100 }
1101}
1102
1104 int64_t C1) {
1105 switch (CC) {
1106 default:
1107 llvm_unreachable("Unexpected CC");
1108 case RISCVCC::COND_EQ:
1109 return C0 == C1;
1110 case RISCVCC::COND_NE:
1111 return C0 != C1;
1112 case RISCVCC::COND_LT:
1113 return C0 < C1;
1114 case RISCVCC::COND_GE:
1115 return C0 >= C1;
1116 case RISCVCC::COND_LTU:
1117 return (uint64_t)C0 < (uint64_t)C1;
1118 case RISCVCC::COND_GEU:
1119 return (uint64_t)C0 >= (uint64_t)C1;
1120 }
1121}
1122
1123// The contents of values added to Cond are not examined outside of
1124// RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we
1125// push BranchOpcode, Reg1, Reg2.
1128 // Block ends with fall-through condbranch.
1129 assert(LastInst.getDesc().isConditionalBranch() &&
1130 "Unknown conditional branch");
1131 Target = LastInst.getOperand(2).getMBB();
1132 Cond.push_back(MachineOperand::CreateImm(LastInst.getOpcode()));
1133 Cond.push_back(LastInst.getOperand(0));
1134 Cond.push_back(LastInst.getOperand(1));
1135}
1136
1137static unsigned getInverseXqcicmOpcode(unsigned Opcode) {
1138 switch (Opcode) {
1139 default:
1140 llvm_unreachable("Unexpected Opcode");
1141 case RISCV::QC_MVEQ:
1142 return RISCV::QC_MVNE;
1143 case RISCV::QC_MVNE:
1144 return RISCV::QC_MVEQ;
1145 case RISCV::QC_MVLT:
1146 return RISCV::QC_MVGE;
1147 case RISCV::QC_MVGE:
1148 return RISCV::QC_MVLT;
1149 case RISCV::QC_MVLTU:
1150 return RISCV::QC_MVGEU;
1151 case RISCV::QC_MVGEU:
1152 return RISCV::QC_MVLTU;
1153 case RISCV::QC_MVEQI:
1154 return RISCV::QC_MVNEI;
1155 case RISCV::QC_MVNEI:
1156 return RISCV::QC_MVEQI;
1157 case RISCV::QC_MVLTI:
1158 return RISCV::QC_MVGEI;
1159 case RISCV::QC_MVGEI:
1160 return RISCV::QC_MVLTI;
1161 case RISCV::QC_MVLTUI:
1162 return RISCV::QC_MVGEUI;
1163 case RISCV::QC_MVGEUI:
1164 return RISCV::QC_MVLTUI;
1165 }
1166}
1167
1168unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC, unsigned SelectOpc) {
1169 switch (SelectOpc) {
1170 default:
1171 switch (CC) {
1172 default:
1173 llvm_unreachable("Unexpected condition code!");
1174 case RISCVCC::COND_EQ:
1175 return RISCV::BEQ;
1176 case RISCVCC::COND_NE:
1177 return RISCV::BNE;
1178 case RISCVCC::COND_LT:
1179 return RISCV::BLT;
1180 case RISCVCC::COND_GE:
1181 return RISCV::BGE;
1182 case RISCVCC::COND_LTU:
1183 return RISCV::BLTU;
1184 case RISCVCC::COND_GEU:
1185 return RISCV::BGEU;
1186 }
1187 break;
1188 case RISCV::Select_GPR_Using_CC_Imm5_Zibi:
1189 switch (CC) {
1190 default:
1191 llvm_unreachable("Unexpected condition code!");
1192 case RISCVCC::COND_EQ:
1193 return RISCV::BEQI;
1194 case RISCVCC::COND_NE:
1195 return RISCV::BNEI;
1196 }
1197 break;
1198 case RISCV::Select_GPR_Using_CC_SImm5_CV:
1199 switch (CC) {
1200 default:
1201 llvm_unreachable("Unexpected condition code!");
1202 case RISCVCC::COND_EQ:
1203 return RISCV::CV_BEQIMM;
1204 case RISCVCC::COND_NE:
1205 return RISCV::CV_BNEIMM;
1206 }
1207 break;
1208 case RISCV::Select_GPRNoX0_Using_CC_SImm5NonZero_QC:
1209 switch (CC) {
1210 default:
1211 llvm_unreachable("Unexpected condition code!");
1212 case RISCVCC::COND_EQ:
1213 return RISCV::QC_BEQI;
1214 case RISCVCC::COND_NE:
1215 return RISCV::QC_BNEI;
1216 case RISCVCC::COND_LT:
1217 return RISCV::QC_BLTI;
1218 case RISCVCC::COND_GE:
1219 return RISCV::QC_BGEI;
1220 }
1221 break;
1222 case RISCV::Select_GPRNoX0_Using_CC_UImm5NonZero_QC:
1223 switch (CC) {
1224 default:
1225 llvm_unreachable("Unexpected condition code!");
1226 case RISCVCC::COND_LTU:
1227 return RISCV::QC_BLTUI;
1228 case RISCVCC::COND_GEU:
1229 return RISCV::QC_BGEUI;
1230 }
1231 break;
1232 case RISCV::Select_GPRNoX0_Using_CC_SImm16NonZero_QC:
1233 switch (CC) {
1234 default:
1235 llvm_unreachable("Unexpected condition code!");
1236 case RISCVCC::COND_EQ:
1237 return RISCV::QC_E_BEQI;
1238 case RISCVCC::COND_NE:
1239 return RISCV::QC_E_BNEI;
1240 case RISCVCC::COND_LT:
1241 return RISCV::QC_E_BLTI;
1242 case RISCVCC::COND_GE:
1243 return RISCV::QC_E_BGEI;
1244 }
1245 break;
1246 case RISCV::Select_GPRNoX0_Using_CC_UImm16NonZero_QC:
1247 switch (CC) {
1248 default:
1249 llvm_unreachable("Unexpected condition code!");
1250 case RISCVCC::COND_LTU:
1251 return RISCV::QC_E_BLTUI;
1252 case RISCVCC::COND_GEU:
1253 return RISCV::QC_E_BGEUI;
1254 }
1255 break;
1256 case RISCV::Select_GPR_Using_CC_UImmLog2XLen_NDS:
1257 switch (CC) {
1258 default:
1259 llvm_unreachable("Unexpected condition code!");
1260 case RISCVCC::COND_EQ:
1261 return RISCV::NDS_BBC;
1262 case RISCVCC::COND_NE:
1263 return RISCV::NDS_BBS;
1264 }
1265 break;
1266 case RISCV::Select_GPR_Using_CC_UImm7_NDS:
1267 switch (CC) {
1268 default:
1269 llvm_unreachable("Unexpected condition code!");
1270 case RISCVCC::COND_EQ:
1271 return RISCV::NDS_BEQC;
1272 case RISCVCC::COND_NE:
1273 return RISCV::NDS_BNEC;
1274 }
1275 break;
1276 }
1277}
1278
1280 switch (CC) {
1281 default:
1282 llvm_unreachable("Unrecognized conditional branch");
1283 case RISCVCC::COND_EQ:
1284 return RISCVCC::COND_NE;
1285 case RISCVCC::COND_NE:
1286 return RISCVCC::COND_EQ;
1287 case RISCVCC::COND_LT:
1288 return RISCVCC::COND_GE;
1289 case RISCVCC::COND_GE:
1290 return RISCVCC::COND_LT;
1291 case RISCVCC::COND_LTU:
1292 return RISCVCC::COND_GEU;
1293 case RISCVCC::COND_GEU:
1294 return RISCVCC::COND_LTU;
1295 }
1296}
1297
1300 MachineBasicBlock *&FBB,
1302 bool AllowModify) const {
1303 TBB = FBB = nullptr;
1304 Cond.clear();
1305
1306 // If the block has no terminators, it just falls into the block after it.
1307 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
1308 if (I == MBB.end() || !isUnpredicatedTerminator(*I))
1309 return false;
1310
1311 // Count the number of terminators and find the first unconditional or
1312 // indirect branch.
1313 MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end();
1314 int NumTerminators = 0;
1315 for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(*J);
1316 J++) {
1317 NumTerminators++;
1318 if (J->getDesc().isUnconditionalBranch() ||
1319 J->getDesc().isIndirectBranch()) {
1320 FirstUncondOrIndirectBr = J.getReverse();
1321 }
1322 }
1323
1324 // If AllowModify is true, we can erase any terminators after
1325 // FirstUncondOrIndirectBR.
1326 if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) {
1327 while (std::next(FirstUncondOrIndirectBr) != MBB.end()) {
1328 std::next(FirstUncondOrIndirectBr)->eraseFromParent();
1329 NumTerminators--;
1330 }
1331 I = FirstUncondOrIndirectBr;
1332 }
1333
1334 // We can't handle blocks that end in an indirect branch.
1335 if (I->getDesc().isIndirectBranch())
1336 return true;
1337
1338 // We can't handle Generic branch opcodes from Global ISel.
1339 if (I->isPreISelOpcode())
1340 return true;
1341
1342 // We can't handle blocks with more than 2 terminators.
1343 if (NumTerminators > 2)
1344 return true;
1345
1346 // Handle a single unconditional branch.
1347 if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) {
1349 return false;
1350 }
1351
1352 // Handle a single conditional branch.
1353 if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) {
1355 return false;
1356 }
1357
1358 // Handle a conditional branch followed by an unconditional branch.
1359 if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() &&
1360 I->getDesc().isUnconditionalBranch()) {
1361 parseCondBranch(*std::prev(I), TBB, Cond);
1362 FBB = getBranchDestBlock(*I);
1363 return false;
1364 }
1365
1366 // Otherwise, we can't handle this.
1367 return true;
1368}
1369
1371 int *BytesRemoved) const {
1372 if (BytesRemoved)
1373 *BytesRemoved = 0;
1374 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
1375 if (I == MBB.end())
1376 return 0;
1377
1378 if (!I->getDesc().isUnconditionalBranch() &&
1379 !I->getDesc().isConditionalBranch())
1380 return 0;
1381
1382 // Remove the branch.
1383 if (BytesRemoved)
1384 *BytesRemoved += getInstSizeInBytes(*I);
1385 I->eraseFromParent();
1386
1387 I = MBB.end();
1388
1389 if (I == MBB.begin())
1390 return 1;
1391 --I;
1392 if (!I->getDesc().isConditionalBranch())
1393 return 1;
1394
1395 // Remove the branch.
1396 if (BytesRemoved)
1397 *BytesRemoved += getInstSizeInBytes(*I);
1398 I->eraseFromParent();
1399 return 2;
1400}
1401
1402// Inserts a branch into the end of the specific MachineBasicBlock, returning
1403// the number of instructions inserted.
1406 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
1407 if (BytesAdded)
1408 *BytesAdded = 0;
1409
1410 // Shouldn't be a fall through.
1411 assert(TBB && "insertBranch must not be told to insert a fallthrough");
1412 assert((Cond.size() == 3 || Cond.size() == 0) &&
1413 "RISC-V branch conditions have two components!");
1414
1415 // Unconditional branch.
1416 if (Cond.empty()) {
1417 MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(TBB);
1418 if (BytesAdded)
1419 *BytesAdded += getInstSizeInBytes(MI);
1420 return 1;
1421 }
1422
1423 // Either a one or two-way conditional branch.
1424 MachineInstr &CondMI = *BuildMI(&MBB, DL, get(Cond[0].getImm()))
1425 .add(Cond[1])
1426 .add(Cond[2])
1427 .addMBB(TBB);
1428 if (BytesAdded)
1429 *BytesAdded += getInstSizeInBytes(CondMI);
1430
1431 // One-way conditional branch.
1432 if (!FBB)
1433 return 1;
1434
1435 // Two-way conditional branch.
1436 MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(FBB);
1437 if (BytesAdded)
1438 *BytesAdded += getInstSizeInBytes(MI);
1439 return 2;
1440}
1441
1443 MachineBasicBlock &DestBB,
1444 MachineBasicBlock &RestoreBB,
1445 const DebugLoc &DL, int64_t BrOffset,
1446 RegScavenger *RS) const {
1447 assert(RS && "RegScavenger required for long branching");
1448 assert(MBB.empty() &&
1449 "new block should be inserted for expanding unconditional branch");
1450 assert(MBB.pred_size() == 1);
1451 assert(RestoreBB.empty() &&
1452 "restore block should be inserted for restoring clobbered registers");
1453
1454 MachineFunction *MF = MBB.getParent();
1458
1459 if (!isInt<32>(BrOffset))
1461 "Branch offsets outside of the signed 32-bit range not supported");
1462
1463 // FIXME: A virtual register must be used initially, as the register
1464 // scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch
1465 // uses the same workaround).
1466 Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRJALRRegClass);
1467 auto II = MBB.end();
1468 // We may also update the jump target to RestoreBB later.
1469 MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump))
1470 .addReg(ScratchReg, RegState::Define | RegState::Dead)
1471 .addMBB(&DestBB, RISCVII::MO_CALL);
1472
1473 RS->enterBasicBlockEnd(MBB);
1474 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
1475 if (STI.hasStdExtZicfilp())
1476 RC = &RISCV::GPRX7RegClass;
1477 Register TmpGPR =
1478 RS->scavengeRegisterBackwards(*RC, MI.getIterator(),
1479 /*RestoreAfter=*/false, /*SpAdj=*/0,
1480 /*AllowSpill=*/false);
1481 if (TmpGPR.isValid())
1482 RS->setRegUsed(TmpGPR);
1483 else {
1484 // The case when there is no scavenged register needs special handling.
1485
1486 // Pick s11(or s1 for rve) because it doesn't make a difference.
1487 TmpGPR = STI.hasStdExtE() ? RISCV::X9 : RISCV::X27;
1488 // Force t2 if Zicfilp is on
1489 if (STI.hasStdExtZicfilp())
1490 TmpGPR = RISCV::X7;
1491
1492 int FrameIndex = RVFI->getBranchRelaxationScratchFrameIndex();
1493 if (FrameIndex == -1)
1494 report_fatal_error("underestimated function size");
1495
1496 storeRegToStackSlot(MBB, MI, TmpGPR, /*IsKill=*/true, FrameIndex,
1497 &RISCV::GPRRegClass, Register());
1498 TRI->eliminateFrameIndex(std::prev(MI.getIterator()),
1499 /*SpAdj=*/0, /*FIOperandNum=*/1);
1500
1501 MI.getOperand(1).setMBB(&RestoreBB);
1502
1503 loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex,
1504 &RISCV::GPRRegClass, Register());
1505 TRI->eliminateFrameIndex(RestoreBB.back(),
1506 /*SpAdj=*/0, /*FIOperandNum=*/1);
1507 }
1508
1509 MRI.replaceRegWith(ScratchReg, TmpGPR);
1510 MRI.clearVirtRegs();
1511}
1512
1515 assert((Cond.size() == 3) && "Invalid branch condition!");
1516 switch (Cond[0].getImm()) {
1517 default:
1518 llvm_unreachable("Unknown conditional branch!");
1519 case RISCV::BEQ:
1520 Cond[0].setImm(RISCV::BNE);
1521 break;
1522 case RISCV::BEQI:
1523 Cond[0].setImm(RISCV::BNEI);
1524 break;
1525 case RISCV::BNE:
1526 Cond[0].setImm(RISCV::BEQ);
1527 break;
1528 case RISCV::BNEI:
1529 Cond[0].setImm(RISCV::BEQI);
1530 break;
1531 case RISCV::BLT:
1532 Cond[0].setImm(RISCV::BGE);
1533 break;
1534 case RISCV::BGE:
1535 Cond[0].setImm(RISCV::BLT);
1536 break;
1537 case RISCV::BLTU:
1538 Cond[0].setImm(RISCV::BGEU);
1539 break;
1540 case RISCV::BGEU:
1541 Cond[0].setImm(RISCV::BLTU);
1542 break;
1543 case RISCV::CV_BEQIMM:
1544 Cond[0].setImm(RISCV::CV_BNEIMM);
1545 break;
1546 case RISCV::CV_BNEIMM:
1547 Cond[0].setImm(RISCV::CV_BEQIMM);
1548 break;
1549 case RISCV::QC_BEQI:
1550 Cond[0].setImm(RISCV::QC_BNEI);
1551 break;
1552 case RISCV::QC_BNEI:
1553 Cond[0].setImm(RISCV::QC_BEQI);
1554 break;
1555 case RISCV::QC_BGEI:
1556 Cond[0].setImm(RISCV::QC_BLTI);
1557 break;
1558 case RISCV::QC_BLTI:
1559 Cond[0].setImm(RISCV::QC_BGEI);
1560 break;
1561 case RISCV::QC_BGEUI:
1562 Cond[0].setImm(RISCV::QC_BLTUI);
1563 break;
1564 case RISCV::QC_BLTUI:
1565 Cond[0].setImm(RISCV::QC_BGEUI);
1566 break;
1567 case RISCV::QC_E_BEQI:
1568 Cond[0].setImm(RISCV::QC_E_BNEI);
1569 break;
1570 case RISCV::QC_E_BNEI:
1571 Cond[0].setImm(RISCV::QC_E_BEQI);
1572 break;
1573 case RISCV::QC_E_BGEI:
1574 Cond[0].setImm(RISCV::QC_E_BLTI);
1575 break;
1576 case RISCV::QC_E_BLTI:
1577 Cond[0].setImm(RISCV::QC_E_BGEI);
1578 break;
1579 case RISCV::QC_E_BGEUI:
1580 Cond[0].setImm(RISCV::QC_E_BLTUI);
1581 break;
1582 case RISCV::QC_E_BLTUI:
1583 Cond[0].setImm(RISCV::QC_E_BGEUI);
1584 break;
1585 case RISCV::NDS_BBC:
1586 Cond[0].setImm(RISCV::NDS_BBS);
1587 break;
1588 case RISCV::NDS_BBS:
1589 Cond[0].setImm(RISCV::NDS_BBC);
1590 break;
1591 case RISCV::NDS_BEQC:
1592 Cond[0].setImm(RISCV::NDS_BNEC);
1593 break;
1594 case RISCV::NDS_BNEC:
1595 Cond[0].setImm(RISCV::NDS_BEQC);
1596 break;
1597 }
1598
1599 return false;
1600}
1601
1602// Return true if the instruction is a load immediate instruction (i.e.
1603// (ADDI x0, imm) or (BSETI x0, imm)).
1604static bool isLoadImm(const MachineInstr *MI, int64_t &Imm) {
1605 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
1606 MI->getOperand(1).getReg() == RISCV::X0) {
1607 Imm = MI->getOperand(2).getImm();
1608 return true;
1609 }
1610 // BSETI can be used to create power of 2 constants. Only 2048 is currently
1611 // interesting because it is 1 more than the maximum ADDI constant.
1612 if (MI->getOpcode() == RISCV::BSETI && MI->getOperand(1).isReg() &&
1613 MI->getOperand(1).getReg() == RISCV::X0 &&
1614 MI->getOperand(2).getImm() == 11) {
1615 Imm = 2048;
1616 return true;
1617 }
1618 return false;
1619}
1620
1622 const MachineOperand &Op, int64_t &Imm) {
1623 // Either a load from immediate instruction or X0.
1624 if (!Op.isReg())
1625 return false;
1626
1627 Register Reg = Op.getReg();
1628 if (Reg == RISCV::X0) {
1629 Imm = 0;
1630 return true;
1631 }
1632 return Reg.isVirtual() && isLoadImm(MRI.getVRegDef(Reg), Imm);
1633}
1634
1636 bool IsSigned = false;
1637 bool IsEquality = false;
1638 switch (MI.getOpcode()) {
1639 default:
1640 return false;
1641 case RISCV::BEQ:
1642 case RISCV::BNE:
1643 IsEquality = true;
1644 break;
1645 case RISCV::BGE:
1646 case RISCV::BLT:
1647 IsSigned = true;
1648 break;
1649 case RISCV::BGEU:
1650 case RISCV::BLTU:
1651 break;
1652 }
1653
1654 MachineBasicBlock *MBB = MI.getParent();
1655 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1656
1657 const MachineOperand &LHS = MI.getOperand(0);
1658 const MachineOperand &RHS = MI.getOperand(1);
1659 MachineBasicBlock *TBB = MI.getOperand(2).getMBB();
1660
1661 RISCVCC::CondCode CC = getCondFromBranchOpc(MI.getOpcode());
1663
1664 // Canonicalize conditional branches which can be constant folded into
1665 // beqz or bnez. We can't modify the CFG here.
1666 int64_t C0, C1;
1667 if (isFromLoadImm(MRI, LHS, C0) && isFromLoadImm(MRI, RHS, C1)) {
1668 unsigned NewOpc = evaluateCondBranch(CC, C0, C1) ? RISCV::BEQ : RISCV::BNE;
1669 // Build the new branch and remove the old one.
1670 BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
1671 .addReg(RISCV::X0)
1672 .addReg(RISCV::X0)
1673 .addMBB(TBB);
1674 MI.eraseFromParent();
1675 return true;
1676 }
1677
1678 if (IsEquality)
1679 return false;
1680
1681 // For two constants C0 and C1 from
1682 // ```
1683 // li Y, C0
1684 // li Z, C1
1685 // ```
1686 // 1. if C1 = C0 + 1
1687 // we can turn:
1688 // (a) blt Y, X -> bge X, Z
1689 // (b) bge Y, X -> blt X, Z
1690 //
1691 // 2. if C1 = C0 - 1
1692 // we can turn:
1693 // (a) blt X, Y -> bge Z, X
1694 // (b) bge X, Y -> blt Z, X
1695 //
1696 // To make sure this optimization is really beneficial, we only
1697 // optimize for cases where Y had only one use (i.e. only used by the branch).
1698 // Try to find the register for constant Z; return
1699 // invalid register otherwise.
1700 auto searchConst = [&](int64_t C1) -> Register {
1702 auto DefC1 = std::find_if(++II, E, [&](const MachineInstr &I) -> bool {
1703 int64_t Imm;
1704 return isLoadImm(&I, Imm) && Imm == C1 &&
1705 I.getOperand(0).getReg().isVirtual();
1706 });
1707 if (DefC1 != E)
1708 return DefC1->getOperand(0).getReg();
1709
1710 return Register();
1711 };
1712
1713 unsigned NewOpc = RISCVCC::getBrCond(getInverseBranchCondition(CC));
1714
1715 // Might be case 1.
1716 // Don't change 0 to 1 since we can use x0.
1717 // For unsigned cases changing -1U to 0 would be incorrect.
1718 // The incorrect case for signed would be INT_MAX, but isFromLoadImm can't
1719 // return that.
1720 if (isFromLoadImm(MRI, LHS, C0) && C0 != 0 && LHS.getReg().isVirtual() &&
1721 MRI.hasOneUse(LHS.getReg()) && (IsSigned || C0 != -1)) {
1722 assert((isInt<12>(C0) || C0 == 2048) && "Unexpected immediate");
1723 if (Register RegZ = searchConst(C0 + 1)) {
1724 BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
1725 .add(RHS)
1726 .addReg(RegZ)
1727 .addMBB(TBB);
1728 // We might extend the live range of Z, clear its kill flag to
1729 // account for this.
1730 MRI.clearKillFlags(RegZ);
1731 MI.eraseFromParent();
1732 return true;
1733 }
1734 }
1735
1736 // Might be case 2.
1737 // For signed cases we don't want to change 0 since we can use x0.
1738 // For unsigned cases changing 0 to -1U would be incorrect.
1739 // The incorrect case for signed would be INT_MIN, but isFromLoadImm can't
1740 // return that.
1741 if (isFromLoadImm(MRI, RHS, C0) && C0 != 0 && RHS.getReg().isVirtual() &&
1742 MRI.hasOneUse(RHS.getReg())) {
1743 assert((isInt<12>(C0) || C0 == 2048) && "Unexpected immediate");
1744 if (Register RegZ = searchConst(C0 - 1)) {
1745 BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
1746 .addReg(RegZ)
1747 .add(LHS)
1748 .addMBB(TBB);
1749 // We might extend the live range of Z, clear its kill flag to
1750 // account for this.
1751 MRI.clearKillFlags(RegZ);
1752 MI.eraseFromParent();
1753 return true;
1754 }
1755 }
1756
1757 return false;
1758}
1759
1762 assert(MI.getDesc().isBranch() && "Unexpected opcode!");
1763 // The branch target is always the last operand.
1764 int NumOp = MI.getNumExplicitOperands();
1765 return MI.getOperand(NumOp - 1).getMBB();
1766}
1767
1769 int64_t BrOffset) const {
1770 unsigned XLen = STI.getXLen();
1771 // Ideally we could determine the supported branch offset from the
1772 // RISCVII::FormMask, but this can't be used for Pseudo instructions like
1773 // PseudoBR.
1774 switch (BranchOp) {
1775 default:
1776 llvm_unreachable("Unexpected opcode!");
1777 case RISCV::NDS_BBC:
1778 case RISCV::NDS_BBS:
1779 case RISCV::NDS_BEQC:
1780 case RISCV::NDS_BNEC:
1781 return isInt<11>(BrOffset);
1782 case RISCV::BEQ:
1783 case RISCV::BNE:
1784 case RISCV::BLT:
1785 case RISCV::BGE:
1786 case RISCV::BLTU:
1787 case RISCV::BGEU:
1788 case RISCV::BEQI:
1789 case RISCV::BNEI:
1790 case RISCV::CV_BEQIMM:
1791 case RISCV::CV_BNEIMM:
1792 case RISCV::QC_BEQI:
1793 case RISCV::QC_BNEI:
1794 case RISCV::QC_BGEI:
1795 case RISCV::QC_BLTI:
1796 case RISCV::QC_BLTUI:
1797 case RISCV::QC_BGEUI:
1798 case RISCV::QC_E_BEQI:
1799 case RISCV::QC_E_BNEI:
1800 case RISCV::QC_E_BGEI:
1801 case RISCV::QC_E_BLTI:
1802 case RISCV::QC_E_BLTUI:
1803 case RISCV::QC_E_BGEUI:
1804 return isInt<13>(BrOffset);
1805 case RISCV::JAL:
1806 case RISCV::PseudoBR:
1807 return isInt<21>(BrOffset);
1808 case RISCV::PseudoJump:
1809 return isInt<32>(SignExtend64(BrOffset + 0x800, XLen));
1810 }
1811}
1812
1813// If the operation has a predicated pseudo instruction, return the pseudo
1814// instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END.
1815// TODO: Support more operations.
1816unsigned getPredicatedOpcode(unsigned Opcode) {
1817 // clang-format off
1818 switch (Opcode) {
1819 case RISCV::ADD: return RISCV::PseudoCCADD;
1820 case RISCV::SUB: return RISCV::PseudoCCSUB;
1821 case RISCV::SLL: return RISCV::PseudoCCSLL;
1822 case RISCV::SRL: return RISCV::PseudoCCSRL;
1823 case RISCV::SRA: return RISCV::PseudoCCSRA;
1824 case RISCV::AND: return RISCV::PseudoCCAND;
1825 case RISCV::OR: return RISCV::PseudoCCOR;
1826 case RISCV::XOR: return RISCV::PseudoCCXOR;
1827 case RISCV::MAX: return RISCV::PseudoCCMAX;
1828 case RISCV::MAXU: return RISCV::PseudoCCMAXU;
1829 case RISCV::MIN: return RISCV::PseudoCCMIN;
1830 case RISCV::MINU: return RISCV::PseudoCCMINU;
1831 case RISCV::MUL: return RISCV::PseudoCCMUL;
1832 case RISCV::LUI: return RISCV::PseudoCCLUI;
1833 case RISCV::QC_LI: return RISCV::PseudoCCQC_LI;
1834 case RISCV::QC_E_LI: return RISCV::PseudoCCQC_E_LI;
1835
1836 case RISCV::ADDI: return RISCV::PseudoCCADDI;
1837 case RISCV::SLLI: return RISCV::PseudoCCSLLI;
1838 case RISCV::SRLI: return RISCV::PseudoCCSRLI;
1839 case RISCV::SRAI: return RISCV::PseudoCCSRAI;
1840 case RISCV::ANDI: return RISCV::PseudoCCANDI;
1841 case RISCV::ORI: return RISCV::PseudoCCORI;
1842 case RISCV::XORI: return RISCV::PseudoCCXORI;
1843
1844 case RISCV::ADDW: return RISCV::PseudoCCADDW;
1845 case RISCV::SUBW: return RISCV::PseudoCCSUBW;
1846 case RISCV::SLLW: return RISCV::PseudoCCSLLW;
1847 case RISCV::SRLW: return RISCV::PseudoCCSRLW;
1848 case RISCV::SRAW: return RISCV::PseudoCCSRAW;
1849
1850 case RISCV::ADDIW: return RISCV::PseudoCCADDIW;
1851 case RISCV::SLLIW: return RISCV::PseudoCCSLLIW;
1852 case RISCV::SRLIW: return RISCV::PseudoCCSRLIW;
1853 case RISCV::SRAIW: return RISCV::PseudoCCSRAIW;
1854
1855 case RISCV::ANDN: return RISCV::PseudoCCANDN;
1856 case RISCV::ORN: return RISCV::PseudoCCORN;
1857 case RISCV::XNOR: return RISCV::PseudoCCXNOR;
1858
1859 case RISCV::NDS_BFOS: return RISCV::PseudoCCNDS_BFOS;
1860 case RISCV::NDS_BFOZ: return RISCV::PseudoCCNDS_BFOZ;
1861 }
1862 // clang-format on
1863
1864 return RISCV::INSTRUCTION_LIST_END;
1865}
1866
1867/// Identify instructions that can be folded into a CCMOV instruction, and
1868/// return the defining instruction.
1870 const MachineRegisterInfo &MRI,
1871 const TargetInstrInfo *TII,
1872 const RISCVSubtarget &STI) {
1873 if (!Reg.isVirtual())
1874 return nullptr;
1875 if (!MRI.hasOneNonDBGUse(Reg))
1876 return nullptr;
1877 MachineInstr *MI = MRI.getVRegDef(Reg);
1878 if (!MI)
1879 return nullptr;
1880
1881 if (!STI.hasShortForwardBranchIMinMax() &&
1882 (MI->getOpcode() == RISCV::MAX || MI->getOpcode() == RISCV::MIN ||
1883 MI->getOpcode() == RISCV::MINU || MI->getOpcode() == RISCV::MAXU))
1884 return nullptr;
1885
1886 if (!STI.hasShortForwardBranchIMul() && MI->getOpcode() == RISCV::MUL)
1887 return nullptr;
1888
1889 // Check if MI can be predicated and folded into the CCMOV.
1890 if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)
1891 return nullptr;
1892 // Don't predicate li idiom.
1893 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
1894 MI->getOperand(1).getReg() == RISCV::X0)
1895 return nullptr;
1896 // Check if MI has any other defs or physreg uses.
1897 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
1898 // Reject frame index operands, PEI can't handle the predicated pseudos.
1899 if (MO.isFI() || MO.isCPI() || MO.isJTI())
1900 return nullptr;
1901 if (!MO.isReg())
1902 continue;
1903 // MI can't have any tied operands, that would conflict with predication.
1904 if (MO.isTied())
1905 return nullptr;
1906 if (MO.isDef())
1907 return nullptr;
1908 // Allow constant physregs.
1909 if (MO.getReg().isPhysical() && !MRI.isConstantPhysReg(MO.getReg()))
1910 return nullptr;
1911 }
1912 bool DontMoveAcrossStores = true;
1913 if (!MI->isSafeToMove(DontMoveAcrossStores))
1914 return nullptr;
1915 return MI;
1916}
1917
1921 bool PreferFalse) const {
1922 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
1923 "Unknown select instruction");
1924 if (!STI.hasShortForwardBranchIALU())
1925 return nullptr;
1926
1927 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
1929 canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this, STI);
1930 bool Invert = !DefMI;
1931 if (!DefMI)
1932 DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this, STI);
1933 if (!DefMI)
1934 return nullptr;
1935
1936 // Find new register class to use.
1937 MachineOperand FalseReg = MI.getOperand(Invert ? 5 : 4);
1938 Register DestReg = MI.getOperand(0).getReg();
1939 const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
1940 if (!MRI.constrainRegClass(DestReg, PreviousClass))
1941 return nullptr;
1942
1943 unsigned PredOpc = getPredicatedOpcode(DefMI->getOpcode());
1944 assert(PredOpc != RISCV::INSTRUCTION_LIST_END && "Unexpected opcode!");
1945
1946 // Create a new predicated version of DefMI.
1947 MachineInstrBuilder NewMI =
1948 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(PredOpc), DestReg);
1949
1950 // Copy the condition portion.
1951 NewMI.add(MI.getOperand(1));
1952 NewMI.add(MI.getOperand(2));
1953
1954 // Add condition code, inverting if necessary.
1955 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
1956 if (Invert)
1958 NewMI.addImm(CC);
1959
1960 // Copy the false register.
1961 NewMI.add(FalseReg);
1962
1963 // Copy all the DefMI operands.
1964 const MCInstrDesc &DefDesc = DefMI->getDesc();
1965 for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i)
1966 NewMI.add(DefMI->getOperand(i));
1967
1968 // Update SeenMIs set: register newly created MI and erase removed DefMI.
1969 SeenMIs.insert(NewMI);
1970 SeenMIs.erase(DefMI);
1971
1972 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
1973 // DefMI would be invalid when transferred inside the loop. Checking for a
1974 // loop is expensive, but at least remove kill flags if they are in different
1975 // BBs.
1976 if (DefMI->getParent() != MI.getParent())
1977 NewMI->clearKillInfo();
1978
1979 // The caller will erase MI, but not DefMI.
1980 DefMI->eraseFromParent();
1981 return NewMI;
1982}
1983
1985 if (MI.isMetaInstruction())
1986 return 0;
1987
1988 unsigned Opcode = MI.getOpcode();
1989
1990 if (Opcode == TargetOpcode::INLINEASM ||
1991 Opcode == TargetOpcode::INLINEASM_BR) {
1992 const MachineFunction &MF = *MI.getParent()->getParent();
1993 return getInlineAsmLength(MI.getOperand(0).getSymbolName(),
1994 *MF.getTarget().getMCAsmInfo());
1995 }
1996
1997 if (requiresNTLHint(MI)) {
1998 if (STI.hasStdExtZca()) {
1999 if (isCompressibleInst(MI, STI))
2000 return 4; // c.ntl.all + c.load/c.store
2001 return 6; // c.ntl.all + load/store
2002 }
2003 return 8; // ntl.all + load/store
2004 }
2005
2006 if (Opcode == TargetOpcode::BUNDLE)
2007 return getInstBundleLength(MI);
2008
2009 if (MI.getParent() && MI.getParent()->getParent()) {
2010 if (isCompressibleInst(MI, STI))
2011 return 2;
2012 }
2013
2014 switch (Opcode) {
2015 case RISCV::PseudoMV_FPR16INX:
2016 case RISCV::PseudoMV_FPR32INX:
2017 // MV is always compressible to either c.mv or c.li rd, 0.
2018 return STI.hasStdExtZca() ? 2 : 4;
2019 case TargetOpcode::STACKMAP:
2020 // The upper bound for a stackmap intrinsic is the full length of its shadow
2022 case TargetOpcode::PATCHPOINT:
2023 // The size of the patchpoint intrinsic is the number of bytes requested
2025 case TargetOpcode::STATEPOINT: {
2026 // The size of the statepoint intrinsic is the number of bytes requested
2027 unsigned NumBytes = StatepointOpers(&MI).getNumPatchBytes();
2028 // No patch bytes means at most a PseudoCall is emitted
2029 return std::max(NumBytes, 8U);
2030 }
2031 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
2032 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
2033 case TargetOpcode::PATCHABLE_TAIL_CALL: {
2034 const MachineFunction &MF = *MI.getParent()->getParent();
2035 const Function &F = MF.getFunction();
2036 if (Opcode == TargetOpcode::PATCHABLE_FUNCTION_ENTER &&
2037 F.hasFnAttribute("patchable-function-entry")) {
2038 unsigned Num;
2039 if (F.getFnAttribute("patchable-function-entry")
2040 .getValueAsString()
2041 .getAsInteger(10, Num))
2042 return get(Opcode).getSize();
2043
2044 // Number of C.NOP or NOP
2045 return (STI.hasStdExtZca() ? 2 : 4) * Num;
2046 }
2047 // XRay uses C.JAL + 21 or 33 C.NOP for each sled in RV32 and RV64,
2048 // respectively.
2049 return STI.is64Bit() ? 68 : 44;
2050 }
2051 default:
2052 return get(Opcode).getSize();
2053 }
2054}
2055
2056unsigned RISCVInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
2057 unsigned Size = 0;
2059 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
2060 while (++I != E && I->isInsideBundle()) {
2061 assert(!I->isBundle() && "No nested bundle!");
2063 }
2064 return Size;
2065}
2066
2068 const unsigned Opcode = MI.getOpcode();
2069 switch (Opcode) {
2070 default:
2071 break;
2072 case RISCV::FSGNJ_D:
2073 case RISCV::FSGNJ_S:
2074 case RISCV::FSGNJ_H:
2075 case RISCV::FSGNJ_D_INX:
2076 case RISCV::FSGNJ_D_IN32X:
2077 case RISCV::FSGNJ_S_INX:
2078 case RISCV::FSGNJ_H_INX:
2079 // The canonical floating-point move is fsgnj rd, rs, rs.
2080 return MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
2081 MI.getOperand(1).getReg() == MI.getOperand(2).getReg();
2082 case RISCV::ADDI:
2083 case RISCV::ORI:
2084 case RISCV::XORI:
2085 return (MI.getOperand(1).isReg() &&
2086 MI.getOperand(1).getReg() == RISCV::X0) ||
2087 (MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0);
2088 }
2089 return MI.isAsCheapAsAMove();
2090}
2091
2092std::optional<DestSourcePair>
2094 if (MI.isMoveReg())
2095 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
2096 switch (MI.getOpcode()) {
2097 default:
2098 break;
2099 case RISCV::ADD:
2100 case RISCV::OR:
2101 case RISCV::XOR:
2102 if (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == RISCV::X0 &&
2103 MI.getOperand(2).isReg())
2104 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
2105 if (MI.getOperand(2).isReg() && MI.getOperand(2).getReg() == RISCV::X0 &&
2106 MI.getOperand(1).isReg())
2107 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
2108 break;
2109 case RISCV::ADDI:
2110 // Operand 1 can be a frameindex but callers expect registers
2111 if (MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
2112 MI.getOperand(2).getImm() == 0)
2113 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
2114 break;
2115 case RISCV::SUB:
2116 if (MI.getOperand(2).isReg() && MI.getOperand(2).getReg() == RISCV::X0 &&
2117 MI.getOperand(1).isReg())
2118 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
2119 break;
2120 case RISCV::SH1ADD:
2121 case RISCV::SH1ADD_UW:
2122 case RISCV::SH2ADD:
2123 case RISCV::SH2ADD_UW:
2124 case RISCV::SH3ADD:
2125 case RISCV::SH3ADD_UW:
2126 if (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == RISCV::X0 &&
2127 MI.getOperand(2).isReg())
2128 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
2129 break;
2130 case RISCV::FSGNJ_D:
2131 case RISCV::FSGNJ_S:
2132 case RISCV::FSGNJ_H:
2133 case RISCV::FSGNJ_D_INX:
2134 case RISCV::FSGNJ_D_IN32X:
2135 case RISCV::FSGNJ_S_INX:
2136 case RISCV::FSGNJ_H_INX:
2137 // The canonical floating-point move is fsgnj rd, rs, rs.
2138 if (MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
2139 MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
2140 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
2141 break;
2142 }
2143 return std::nullopt;
2144}
2145
2147 if (ForceMachineCombinerStrategy.getNumOccurrences() == 0) {
2148 // The option is unused. Choose Local strategy only for in-order cores. When
2149 // scheduling model is unspecified, use MinInstrCount strategy as more
2150 // generic one.
2151 const auto &SchedModel = STI.getSchedModel();
2152 return (!SchedModel.hasInstrSchedModel() || SchedModel.isOutOfOrder())
2155 }
2156 // The strategy was forced by the option.
2158}
2159
2161 MachineInstr &Root, unsigned &Pattern,
2162 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
2163 int16_t FrmOpIdx =
2164 RISCV::getNamedOperandIdx(Root.getOpcode(), RISCV::OpName::frm);
2165 if (FrmOpIdx < 0) {
2166 assert(all_of(InsInstrs,
2167 [](MachineInstr *MI) {
2168 return RISCV::getNamedOperandIdx(MI->getOpcode(),
2169 RISCV::OpName::frm) < 0;
2170 }) &&
2171 "New instructions require FRM whereas the old one does not have it");
2172 return;
2173 }
2174
2175 const MachineOperand &FRM = Root.getOperand(FrmOpIdx);
2176 MachineFunction &MF = *Root.getMF();
2177
2178 for (auto *NewMI : InsInstrs) {
2179 // We'd already added the FRM operand.
2180 if (static_cast<unsigned>(RISCV::getNamedOperandIdx(
2181 NewMI->getOpcode(), RISCV::OpName::frm)) != NewMI->getNumOperands())
2182 continue;
2183 MachineInstrBuilder MIB(MF, NewMI);
2184 MIB.add(FRM);
2185 if (FRM.getImm() == RISCVFPRndMode::DYN)
2186 MIB.addUse(RISCV::FRM, RegState::Implicit);
2187 }
2188}
2189
2190static bool isFADD(unsigned Opc) {
2191 switch (Opc) {
2192 default:
2193 return false;
2194 case RISCV::FADD_H:
2195 case RISCV::FADD_S:
2196 case RISCV::FADD_D:
2197 return true;
2198 }
2199}
2200
2201static bool isFSUB(unsigned Opc) {
2202 switch (Opc) {
2203 default:
2204 return false;
2205 case RISCV::FSUB_H:
2206 case RISCV::FSUB_S:
2207 case RISCV::FSUB_D:
2208 return true;
2209 }
2210}
2211
2212static bool isFMUL(unsigned Opc) {
2213 switch (Opc) {
2214 default:
2215 return false;
2216 case RISCV::FMUL_H:
2217 case RISCV::FMUL_S:
2218 case RISCV::FMUL_D:
2219 return true;
2220 }
2221}
2222
2223bool RISCVInstrInfo::isVectorAssociativeAndCommutative(const MachineInstr &Inst,
2224 bool Invert) const {
2225#define OPCODE_LMUL_CASE(OPC) \
2226 case RISCV::OPC##_M1: \
2227 case RISCV::OPC##_M2: \
2228 case RISCV::OPC##_M4: \
2229 case RISCV::OPC##_M8: \
2230 case RISCV::OPC##_MF2: \
2231 case RISCV::OPC##_MF4: \
2232 case RISCV::OPC##_MF8
2233
2234#define OPCODE_LMUL_MASK_CASE(OPC) \
2235 case RISCV::OPC##_M1_MASK: \
2236 case RISCV::OPC##_M2_MASK: \
2237 case RISCV::OPC##_M4_MASK: \
2238 case RISCV::OPC##_M8_MASK: \
2239 case RISCV::OPC##_MF2_MASK: \
2240 case RISCV::OPC##_MF4_MASK: \
2241 case RISCV::OPC##_MF8_MASK
2242
2243 unsigned Opcode = Inst.getOpcode();
2244 if (Invert) {
2245 if (auto InvOpcode = getInverseOpcode(Opcode))
2246 Opcode = *InvOpcode;
2247 else
2248 return false;
2249 }
2250
2251 // clang-format off
2252 switch (Opcode) {
2253 default:
2254 return false;
2255 OPCODE_LMUL_CASE(PseudoVADD_VV):
2256 OPCODE_LMUL_MASK_CASE(PseudoVADD_VV):
2257 OPCODE_LMUL_CASE(PseudoVMUL_VV):
2258 OPCODE_LMUL_MASK_CASE(PseudoVMUL_VV):
2259 return true;
2260 }
2261 // clang-format on
2262
2263#undef OPCODE_LMUL_MASK_CASE
2264#undef OPCODE_LMUL_CASE
2265}
2266
2267bool RISCVInstrInfo::areRVVInstsReassociable(const MachineInstr &Root,
2268 const MachineInstr &Prev) const {
2269 if (!areOpcodesEqualOrInverse(Root.getOpcode(), Prev.getOpcode()))
2270 return false;
2271
2272 assert(Root.getMF() == Prev.getMF());
2273 const MachineRegisterInfo *MRI = &Root.getMF()->getRegInfo();
2274 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
2275
2276 // Make sure vtype operands are also the same.
2277 const MCInstrDesc &Desc = get(Root.getOpcode());
2278 const uint64_t TSFlags = Desc.TSFlags;
2279
2280 auto checkImmOperand = [&](unsigned OpIdx) {
2281 return Root.getOperand(OpIdx).getImm() == Prev.getOperand(OpIdx).getImm();
2282 };
2283
2284 auto checkRegOperand = [&](unsigned OpIdx) {
2285 return Root.getOperand(OpIdx).getReg() == Prev.getOperand(OpIdx).getReg();
2286 };
2287
2288 // PassThru
2289 // TODO: Potentially we can loosen the condition to consider Root to be
2290 // associable with Prev if Root has NoReg as passthru. In which case we
2291 // also need to loosen the condition on vector policies between these.
2292 if (!checkRegOperand(1))
2293 return false;
2294
2295 // SEW
2296 if (RISCVII::hasSEWOp(TSFlags) &&
2297 !checkImmOperand(RISCVII::getSEWOpNum(Desc)))
2298 return false;
2299
2300 // Mask
2301 if (RISCVII::usesMaskPolicy(TSFlags)) {
2302 const MachineBasicBlock *MBB = Root.getParent();
2305 Register MI1VReg;
2306
2307 bool SeenMI2 = false;
2308 for (auto End = MBB->rend(), It = It1; It != End; ++It) {
2309 if (It == It2) {
2310 SeenMI2 = true;
2311 if (!MI1VReg.isValid())
2312 // There is no V0 def between Root and Prev; they're sharing the
2313 // same V0.
2314 break;
2315 }
2316
2317 if (It->modifiesRegister(RISCV::V0, TRI)) {
2318 Register SrcReg = It->getOperand(1).getReg();
2319 // If it's not VReg it'll be more difficult to track its defs, so
2320 // bailing out here just to be safe.
2321 if (!SrcReg.isVirtual())
2322 return false;
2323
2324 if (!MI1VReg.isValid()) {
2325 // This is the V0 def for Root.
2326 MI1VReg = SrcReg;
2327 continue;
2328 }
2329
2330 // Some random mask updates.
2331 if (!SeenMI2)
2332 continue;
2333
2334 // This is the V0 def for Prev; check if it's the same as that of
2335 // Root.
2336 if (MI1VReg != SrcReg)
2337 return false;
2338 else
2339 break;
2340 }
2341 }
2342
2343 // If we haven't encountered Prev, it's likely that this function was
2344 // called in a wrong way (e.g. Root is before Prev).
2345 assert(SeenMI2 && "Prev is expected to appear before Root");
2346 }
2347
2348 // Tail / Mask policies
2349 if (RISCVII::hasVecPolicyOp(TSFlags) &&
2350 !checkImmOperand(RISCVII::getVecPolicyOpNum(Desc)))
2351 return false;
2352
2353 // VL
2354 if (RISCVII::hasVLOp(TSFlags)) {
2355 unsigned OpIdx = RISCVII::getVLOpNum(Desc);
2356 const MachineOperand &Op1 = Root.getOperand(OpIdx);
2357 const MachineOperand &Op2 = Prev.getOperand(OpIdx);
2358 if (Op1.getType() != Op2.getType())
2359 return false;
2360 switch (Op1.getType()) {
2362 if (Op1.getReg() != Op2.getReg())
2363 return false;
2364 break;
2366 if (Op1.getImm() != Op2.getImm())
2367 return false;
2368 break;
2369 default:
2370 llvm_unreachable("Unrecognized VL operand type");
2371 }
2372 }
2373
2374 // Rounding modes
2375 if (int Idx = RISCVII::getFRMOpNum(Desc); Idx >= 0 && !checkImmOperand(Idx))
2376 return false;
2377 if (int Idx = RISCVII::getVXRMOpNum(Desc); Idx >= 0 && !checkImmOperand(Idx))
2378 return false;
2379
2380 return true;
2381}
2382
2383// Most of our RVV pseudos have passthru operand, so the real operands
2384// start from index = 2.
2385bool RISCVInstrInfo::hasReassociableVectorSibling(const MachineInstr &Inst,
2386 bool &Commuted) const {
2387 const MachineBasicBlock *MBB = Inst.getParent();
2388 const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
2390 "Expect the present of passthrough operand.");
2391 MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg());
2392 MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(3).getReg());
2393
2394 // If only one operand has the same or inverse opcode and it's the second
2395 // source operand, the operands must be commuted.
2396 Commuted = !areRVVInstsReassociable(Inst, *MI1) &&
2397 areRVVInstsReassociable(Inst, *MI2);
2398 if (Commuted)
2399 std::swap(MI1, MI2);
2400
2401 return areRVVInstsReassociable(Inst, *MI1) &&
2402 (isVectorAssociativeAndCommutative(*MI1) ||
2403 isVectorAssociativeAndCommutative(*MI1, /* Invert */ true)) &&
2405 MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg());
2406}
2407
2409 const MachineInstr &Inst, const MachineBasicBlock *MBB) const {
2410 if (!isVectorAssociativeAndCommutative(Inst) &&
2411 !isVectorAssociativeAndCommutative(Inst, /*Invert=*/true))
2413
2414 const MachineOperand &Op1 = Inst.getOperand(2);
2415 const MachineOperand &Op2 = Inst.getOperand(3);
2416 const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
2417
2418 // We need virtual register definitions for the operands that we will
2419 // reassociate.
2420 MachineInstr *MI1 = nullptr;
2421 MachineInstr *MI2 = nullptr;
2422 if (Op1.isReg() && Op1.getReg().isVirtual())
2423 MI1 = MRI.getUniqueVRegDef(Op1.getReg());
2424 if (Op2.isReg() && Op2.getReg().isVirtual())
2425 MI2 = MRI.getUniqueVRegDef(Op2.getReg());
2426
2427 // And at least one operand must be defined in MBB.
2428 return MI1 && MI2 && (MI1->getParent() == MBB || MI2->getParent() == MBB);
2429}
2430
2432 const MachineInstr &Root, unsigned Pattern,
2433 std::array<unsigned, 5> &OperandIndices) const {
2435 if (RISCV::getRVVMCOpcode(Root.getOpcode())) {
2436 // Skip the passthrough operand, so increment all indices by one.
2437 for (unsigned I = 0; I < 5; ++I)
2438 ++OperandIndices[I];
2439 }
2440}
2441
2443 bool &Commuted) const {
2444 if (isVectorAssociativeAndCommutative(Inst) ||
2445 isVectorAssociativeAndCommutative(Inst, /*Invert=*/true))
2446 return hasReassociableVectorSibling(Inst, Commuted);
2447
2448 if (!TargetInstrInfo::hasReassociableSibling(Inst, Commuted))
2449 return false;
2450
2451 const MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo();
2452 unsigned OperandIdx = Commuted ? 2 : 1;
2453 const MachineInstr &Sibling =
2454 *MRI.getVRegDef(Inst.getOperand(OperandIdx).getReg());
2455
2456 int16_t InstFrmOpIdx =
2457 RISCV::getNamedOperandIdx(Inst.getOpcode(), RISCV::OpName::frm);
2458 int16_t SiblingFrmOpIdx =
2459 RISCV::getNamedOperandIdx(Sibling.getOpcode(), RISCV::OpName::frm);
2460
2461 return (InstFrmOpIdx < 0 && SiblingFrmOpIdx < 0) ||
2462 RISCV::hasEqualFRM(Inst, Sibling);
2463}
2464
2466 bool Invert) const {
2467 if (isVectorAssociativeAndCommutative(Inst, Invert))
2468 return true;
2469
2470 unsigned Opc = Inst.getOpcode();
2471 if (Invert) {
2472 auto InverseOpcode = getInverseOpcode(Opc);
2473 if (!InverseOpcode)
2474 return false;
2475 Opc = *InverseOpcode;
2476 }
2477
2478 if (isFADD(Opc) || isFMUL(Opc))
2481
2482 switch (Opc) {
2483 default:
2484 return false;
2485 case RISCV::ADD:
2486 case RISCV::ADDW:
2487 case RISCV::AND:
2488 case RISCV::OR:
2489 case RISCV::XOR:
2490 // From RISC-V ISA spec, if both the high and low bits of the same product
2491 // are required, then the recommended code sequence is:
2492 //
2493 // MULH[[S]U] rdh, rs1, rs2
2494 // MUL rdl, rs1, rs2
2495 // (source register specifiers must be in same order and rdh cannot be the
2496 // same as rs1 or rs2)
2497 //
2498 // Microarchitectures can then fuse these into a single multiply operation
2499 // instead of performing two separate multiplies.
2500 // MachineCombiner may reassociate MUL operands and lose the fusion
2501 // opportunity.
2502 case RISCV::MUL:
2503 case RISCV::MULW:
2504 case RISCV::MIN:
2505 case RISCV::MINU:
2506 case RISCV::MAX:
2507 case RISCV::MAXU:
2508 case RISCV::FMIN_H:
2509 case RISCV::FMIN_S:
2510 case RISCV::FMIN_D:
2511 case RISCV::FMAX_H:
2512 case RISCV::FMAX_S:
2513 case RISCV::FMAX_D:
2514 return true;
2515 }
2516
2517 return false;
2518}
2519
2520std::optional<unsigned>
2521RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const {
2522#define RVV_OPC_LMUL_CASE(OPC, INV) \
2523 case RISCV::OPC##_M1: \
2524 return RISCV::INV##_M1; \
2525 case RISCV::OPC##_M2: \
2526 return RISCV::INV##_M2; \
2527 case RISCV::OPC##_M4: \
2528 return RISCV::INV##_M4; \
2529 case RISCV::OPC##_M8: \
2530 return RISCV::INV##_M8; \
2531 case RISCV::OPC##_MF2: \
2532 return RISCV::INV##_MF2; \
2533 case RISCV::OPC##_MF4: \
2534 return RISCV::INV##_MF4; \
2535 case RISCV::OPC##_MF8: \
2536 return RISCV::INV##_MF8
2537
2538#define RVV_OPC_LMUL_MASK_CASE(OPC, INV) \
2539 case RISCV::OPC##_M1_MASK: \
2540 return RISCV::INV##_M1_MASK; \
2541 case RISCV::OPC##_M2_MASK: \
2542 return RISCV::INV##_M2_MASK; \
2543 case RISCV::OPC##_M4_MASK: \
2544 return RISCV::INV##_M4_MASK; \
2545 case RISCV::OPC##_M8_MASK: \
2546 return RISCV::INV##_M8_MASK; \
2547 case RISCV::OPC##_MF2_MASK: \
2548 return RISCV::INV##_MF2_MASK; \
2549 case RISCV::OPC##_MF4_MASK: \
2550 return RISCV::INV##_MF4_MASK; \
2551 case RISCV::OPC##_MF8_MASK: \
2552 return RISCV::INV##_MF8_MASK
2553
2554 switch (Opcode) {
2555 default:
2556 return std::nullopt;
2557 case RISCV::FADD_H:
2558 return RISCV::FSUB_H;
2559 case RISCV::FADD_S:
2560 return RISCV::FSUB_S;
2561 case RISCV::FADD_D:
2562 return RISCV::FSUB_D;
2563 case RISCV::FSUB_H:
2564 return RISCV::FADD_H;
2565 case RISCV::FSUB_S:
2566 return RISCV::FADD_S;
2567 case RISCV::FSUB_D:
2568 return RISCV::FADD_D;
2569 case RISCV::ADD:
2570 return RISCV::SUB;
2571 case RISCV::SUB:
2572 return RISCV::ADD;
2573 case RISCV::ADDW:
2574 return RISCV::SUBW;
2575 case RISCV::SUBW:
2576 return RISCV::ADDW;
2577 // clang-format off
2578 RVV_OPC_LMUL_CASE(PseudoVADD_VV, PseudoVSUB_VV);
2579 RVV_OPC_LMUL_MASK_CASE(PseudoVADD_VV, PseudoVSUB_VV);
2580 RVV_OPC_LMUL_CASE(PseudoVSUB_VV, PseudoVADD_VV);
2581 RVV_OPC_LMUL_MASK_CASE(PseudoVSUB_VV, PseudoVADD_VV);
2582 // clang-format on
2583 }
2584
2585#undef RVV_OPC_LMUL_MASK_CASE
2586#undef RVV_OPC_LMUL_CASE
2587}
2588
2590 const MachineOperand &MO,
2591 bool DoRegPressureReduce) {
2592 if (!MO.isReg() || !MO.getReg().isVirtual())
2593 return false;
2594 const MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
2595 MachineInstr *MI = MRI.getVRegDef(MO.getReg());
2596 if (!MI || !isFMUL(MI->getOpcode()))
2597 return false;
2598
2601 return false;
2602
2603 // Try combining even if fmul has more than one use as it eliminates
2604 // dependency between fadd(fsub) and fmul. However, it can extend liveranges
2605 // for fmul operands, so reject the transformation in register pressure
2606 // reduction mode.
2607 if (DoRegPressureReduce && !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
2608 return false;
2609
2610 // Do not combine instructions from different basic blocks.
2611 if (Root.getParent() != MI->getParent())
2612 return false;
2613 return RISCV::hasEqualFRM(Root, *MI);
2614}
2615
2617 SmallVectorImpl<unsigned> &Patterns,
2618 bool DoRegPressureReduce) {
2619 unsigned Opc = Root.getOpcode();
2620 bool IsFAdd = isFADD(Opc);
2621 if (!IsFAdd && !isFSUB(Opc))
2622 return false;
2623 bool Added = false;
2624 if (canCombineFPFusedMultiply(Root, Root.getOperand(1),
2625 DoRegPressureReduce)) {
2628 Added = true;
2629 }
2630 if (canCombineFPFusedMultiply(Root, Root.getOperand(2),
2631 DoRegPressureReduce)) {
2634 Added = true;
2635 }
2636 return Added;
2637}
2638
2639static bool getFPPatterns(MachineInstr &Root,
2640 SmallVectorImpl<unsigned> &Patterns,
2641 bool DoRegPressureReduce) {
2642 return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce);
2643}
2644
2645/// Utility routine that checks if \param MO is defined by an
2646/// \param CombineOpc instruction in the basic block \param MBB
2648 const MachineOperand &MO,
2649 unsigned CombineOpc) {
2650 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2651 const MachineInstr *MI = nullptr;
2652
2653 if (MO.isReg() && MO.getReg().isVirtual())
2654 MI = MRI.getUniqueVRegDef(MO.getReg());
2655 // And it needs to be in the trace (otherwise, it won't have a depth).
2656 if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)
2657 return nullptr;
2658 // Must only used by the user we combine with.
2659 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
2660 return nullptr;
2661
2662 return MI;
2663}
2664
2665/// Utility routine that checks if \param MO is defined by a SLLI in \param
2666/// MBB that can be combined by splitting across 2 SHXADD instructions. The
2667/// first SHXADD shift amount is given by \param OuterShiftAmt.
2669 const MachineOperand &MO,
2670 unsigned OuterShiftAmt) {
2671 const MachineInstr *ShiftMI = canCombine(MBB, MO, RISCV::SLLI);
2672 if (!ShiftMI)
2673 return false;
2674
2675 unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm();
2676 if (InnerShiftAmt < OuterShiftAmt || (InnerShiftAmt - OuterShiftAmt) > 3)
2677 return false;
2678
2679 return true;
2680}
2681
2682// Returns the shift amount from a SHXADD instruction. Returns 0 if the
2683// instruction is not a SHXADD.
2684static unsigned getSHXADDShiftAmount(unsigned Opc) {
2685 switch (Opc) {
2686 default:
2687 return 0;
2688 case RISCV::SH1ADD:
2689 return 1;
2690 case RISCV::SH2ADD:
2691 return 2;
2692 case RISCV::SH3ADD:
2693 return 3;
2694 }
2695}
2696
2697// Returns the shift amount from a SHXADD.UW instruction. Returns 0 if the
2698// instruction is not a SHXADD.UW.
2699static unsigned getSHXADDUWShiftAmount(unsigned Opc) {
2700 switch (Opc) {
2701 default:
2702 return 0;
2703 case RISCV::SH1ADD_UW:
2704 return 1;
2705 case RISCV::SH2ADD_UW:
2706 return 2;
2707 case RISCV::SH3ADD_UW:
2708 return 3;
2709 }
2710}
2711
2712// Look for opportunities to combine (sh3add Z, (add X, (slli Y, 5))) into
2713// (sh3add (sh2add Y, Z), X).
2714static bool getSHXADDPatterns(const MachineInstr &Root,
2715 SmallVectorImpl<unsigned> &Patterns) {
2716 unsigned ShiftAmt = getSHXADDShiftAmount(Root.getOpcode());
2717 if (!ShiftAmt)
2718 return false;
2719
2720 const MachineBasicBlock &MBB = *Root.getParent();
2721
2722 const MachineInstr *AddMI = canCombine(MBB, Root.getOperand(2), RISCV::ADD);
2723 if (!AddMI)
2724 return false;
2725
2726 bool Found = false;
2727 if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(1), ShiftAmt)) {
2729 Found = true;
2730 }
2731 if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(2), ShiftAmt)) {
2733 Found = true;
2734 }
2735
2736 return Found;
2737}
2738
2750
2752 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
2753 bool DoRegPressureReduce) const {
2754
2755 if (getFPPatterns(Root, Patterns, DoRegPressureReduce))
2756 return true;
2757
2758 if (getSHXADDPatterns(Root, Patterns))
2759 return true;
2760
2761 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
2762 DoRegPressureReduce);
2763}
2764
2765static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc, unsigned Pattern) {
2766 switch (RootOpc) {
2767 default:
2768 llvm_unreachable("Unexpected opcode");
2769 case RISCV::FADD_H:
2770 return RISCV::FMADD_H;
2771 case RISCV::FADD_S:
2772 return RISCV::FMADD_S;
2773 case RISCV::FADD_D:
2774 return RISCV::FMADD_D;
2775 case RISCV::FSUB_H:
2776 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_H
2777 : RISCV::FNMSUB_H;
2778 case RISCV::FSUB_S:
2779 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_S
2780 : RISCV::FNMSUB_S;
2781 case RISCV::FSUB_D:
2782 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_D
2783 : RISCV::FNMSUB_D;
2784 }
2785}
2786
2787static unsigned getAddendOperandIdx(unsigned Pattern) {
2788 switch (Pattern) {
2789 default:
2790 llvm_unreachable("Unexpected pattern");
2793 return 2;
2796 return 1;
2797 }
2798}
2799
2801 unsigned Pattern,
2804 MachineFunction *MF = Root.getMF();
2807
2808 MachineOperand &Mul1 = Prev.getOperand(1);
2809 MachineOperand &Mul2 = Prev.getOperand(2);
2810 MachineOperand &Dst = Root.getOperand(0);
2812
2813 Register DstReg = Dst.getReg();
2814 unsigned FusedOpc = getFPFusedMultiplyOpcode(Root.getOpcode(), Pattern);
2815 uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags();
2816 DebugLoc MergedLoc =
2818
2819 bool Mul1IsKill = Mul1.isKill();
2820 bool Mul2IsKill = Mul2.isKill();
2821 bool AddendIsKill = Addend.isKill();
2822
2823 // We need to clear kill flags since we may be extending the live range past
2824 // a kill. If the mul had kill flags, we can preserve those since we know
2825 // where the previous range stopped.
2826 MRI.clearKillFlags(Mul1.getReg());
2827 MRI.clearKillFlags(Mul2.getReg());
2828
2830 BuildMI(*MF, MergedLoc, TII->get(FusedOpc), DstReg)
2831 .addReg(Mul1.getReg(), getKillRegState(Mul1IsKill))
2832 .addReg(Mul2.getReg(), getKillRegState(Mul2IsKill))
2833 .addReg(Addend.getReg(), getKillRegState(AddendIsKill))
2834 .setMIFlags(IntersectedFlags);
2835
2836 InsInstrs.push_back(MIB);
2837 if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg()))
2838 DelInstrs.push_back(&Prev);
2839 DelInstrs.push_back(&Root);
2840}
2841
2842// Combine patterns like (sh3add Z, (add X, (slli Y, 5))) to
2843// (sh3add (sh2add Y, Z), X) if the shift amount can be split across two
2844// shXadd instructions. The outer shXadd keeps its original opcode.
2845static void
2846genShXAddAddShift(MachineInstr &Root, unsigned AddOpIdx,
2849 DenseMap<Register, unsigned> &InstrIdxForVirtReg) {
2850 MachineFunction *MF = Root.getMF();
2853
2854 unsigned OuterShiftAmt = getSHXADDShiftAmount(Root.getOpcode());
2855 assert(OuterShiftAmt != 0 && "Unexpected opcode");
2856
2857 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
2858 MachineInstr *ShiftMI =
2859 MRI.getUniqueVRegDef(AddMI->getOperand(AddOpIdx).getReg());
2860
2861 unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm();
2862 assert(InnerShiftAmt >= OuterShiftAmt && "Unexpected shift amount");
2863
2864 unsigned InnerOpc;
2865 switch (InnerShiftAmt - OuterShiftAmt) {
2866 default:
2867 llvm_unreachable("Unexpected shift amount");
2868 case 0:
2869 InnerOpc = RISCV::ADD;
2870 break;
2871 case 1:
2872 InnerOpc = RISCV::SH1ADD;
2873 break;
2874 case 2:
2875 InnerOpc = RISCV::SH2ADD;
2876 break;
2877 case 3:
2878 InnerOpc = RISCV::SH3ADD;
2879 break;
2880 }
2881
2882 const MachineOperand &X = AddMI->getOperand(3 - AddOpIdx);
2883 const MachineOperand &Y = ShiftMI->getOperand(1);
2884 const MachineOperand &Z = Root.getOperand(1);
2885
2886 Register NewVR = MRI.createVirtualRegister(&RISCV::GPRRegClass);
2887
2888 auto MIB1 = BuildMI(*MF, MIMetadata(Root), TII->get(InnerOpc), NewVR)
2889 .addReg(Y.getReg(), getKillRegState(Y.isKill()))
2890 .addReg(Z.getReg(), getKillRegState(Z.isKill()));
2891 auto MIB2 = BuildMI(*MF, MIMetadata(Root), TII->get(Root.getOpcode()),
2892 Root.getOperand(0).getReg())
2893 .addReg(NewVR, RegState::Kill)
2894 .addReg(X.getReg(), getKillRegState(X.isKill()));
2895
2896 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
2897 InsInstrs.push_back(MIB1);
2898 InsInstrs.push_back(MIB2);
2899 DelInstrs.push_back(ShiftMI);
2900 DelInstrs.push_back(AddMI);
2901 DelInstrs.push_back(&Root);
2902}
2903
2905 MachineInstr &Root, unsigned Pattern,
2908 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
2910 switch (Pattern) {
2911 default:
2913 DelInstrs, InstrIdxForVirtReg);
2914 return;
2917 MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(1).getReg());
2918 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
2919 return;
2920 }
2923 MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(2).getReg());
2924 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
2925 return;
2926 }
2928 genShXAddAddShift(Root, 1, InsInstrs, DelInstrs, InstrIdxForVirtReg);
2929 return;
2931 genShXAddAddShift(Root, 2, InsInstrs, DelInstrs, InstrIdxForVirtReg);
2932 return;
2933 }
2934}
2935
2937 StringRef &ErrInfo) const {
2938 MCInstrDesc const &Desc = MI.getDesc();
2939
2940 for (const auto &[Index, Operand] : enumerate(Desc.operands())) {
2941 const MachineOperand &MO = MI.getOperand(Index);
2942 unsigned OpType = Operand.OperandType;
2943 switch (OpType) {
2944 default:
2945 if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
2947 if (!MO.isImm()) {
2948 ErrInfo = "Expected an immediate operand.";
2949 return false;
2950 }
2951 int64_t Imm = MO.getImm();
2952 bool Ok;
2953 switch (OpType) {
2954 default:
2955 llvm_unreachable("Unexpected operand type");
2956
2957#define CASE_OPERAND_UIMM(NUM) \
2958 case RISCVOp::OPERAND_UIMM##NUM: \
2959 Ok = isUInt<NUM>(Imm); \
2960 break;
2961#define CASE_OPERAND_UIMM_LSB_ZEROS(BITS, SUFFIX) \
2962 case RISCVOp::OPERAND_UIMM##BITS##_LSB##SUFFIX: { \
2963 constexpr size_t NumZeros = sizeof(#SUFFIX) - 1; \
2964 Ok = isShiftedUInt<BITS - NumZeros, NumZeros>(Imm); \
2965 break; \
2966 }
2967#define CASE_OPERAND_SIMM(NUM) \
2968 case RISCVOp::OPERAND_SIMM##NUM: \
2969 Ok = isInt<NUM>(Imm); \
2970 break;
2971 // clang-format off
2995 // clang-format on
2997 Ok = isUInt<5>(Imm) && (Imm != 0);
2998 break;
3000 Ok = isUInt<5>(Imm) && (Imm > 3);
3001 break;
3003 Ok = Imm >= 1 && Imm <= 32;
3004 break;
3006 Ok = isUInt<8>(Imm) && Imm >= 32;
3007 break;
3009 Ok = isInt<8>(Imm);
3010 break;
3012 Ok = isShiftedInt<6, 4>(Imm) && (Imm != 0);
3013 break;
3015 Ok = isShiftedUInt<8, 2>(Imm) && (Imm != 0);
3016 break;
3018 Ok = isUInt<16>(Imm) && (Imm != 0);
3019 break;
3021 Ok = Imm == 3;
3022 break;
3024 Ok = Imm == 4;
3025 break;
3027 Ok = (isUInt<5>(Imm) && Imm != 0) || Imm == -1;
3028 break;
3029 // clang-format off
3035 // clang-format on
3037 Ok = Imm >= -15 && Imm <= 16;
3038 break;
3040 Ok = isInt<5>(Imm) && (Imm != 0);
3041 break;
3043 Ok = Imm != 0 && isInt<6>(Imm);
3044 break;
3046 Ok = isUInt<10>(Imm);
3047 break;
3049 Ok = isUInt<11>(Imm);
3050 break;
3052 Ok = isShiftedInt<7, 5>(Imm);
3053 break;
3055 Ok = isInt<16>(Imm) && (Imm != 0);
3056 break;
3058 Ok = isInt<20>(Imm);
3059 break;
3061 Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm);
3062 break;
3064 Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm);
3065 Ok = Ok && Imm != 0;
3066 break;
3068 Ok = (isUInt<5>(Imm) && Imm != 0) || (Imm >= 0xfffe0 && Imm <= 0xfffff);
3069 break;
3071 Ok = Imm >= 0 && Imm <= 10;
3072 break;
3074 Ok = Imm >= 0 && Imm <= 7;
3075 break;
3077 Ok = Imm >= 1 && Imm <= 10;
3078 break;
3080 Ok = Imm >= 2 && Imm <= 14;
3081 break;
3083 Ok = Imm >= RISCVZC::RA && Imm <= RISCVZC::RA_S0_S11;
3084 break;
3086 Ok = Imm >= RISCVZC::RA_S0 && Imm <= RISCVZC::RA_S0_S11;
3087 break;
3089 Ok = Imm >= 0 && Imm <= 48 && Imm % 16 == 0;
3090 break;
3093 break;
3095 Ok = Imm == RISCVFPRndMode::RTZ;
3096 break;
3098 Ok = Imm >= 0 && Imm < RISCVCC::COND_INVALID;
3099 break;
3101 Ok = isValidAtomicOrdering(Imm);
3102 break;
3105 Imm;
3106 break;
3108 Ok = (isUInt<5>(Imm) && RISCVVType::isValidSEW(1 << Imm));
3109 break;
3111 Ok = Imm == 0;
3112 break;
3115 if (RISCVII::usesVXRM(Desc.TSFlags))
3116 Ok = isUInt<2>(Imm);
3117 else
3119 break;
3122 break;
3124 Ok = Imm == 1 || Imm == 2 || Imm == 4;
3125 break;
3126 }
3127 if (!Ok) {
3128 ErrInfo = "Invalid immediate";
3129 return false;
3130 }
3131 }
3132 break;
3134 // TODO: We could be stricter about what non-register operands are
3135 // allowed.
3136 if (MO.isReg()) {
3137 ErrInfo = "Expected a non-register operand.";
3138 return false;
3139 }
3140 if (MO.isImm() && !isInt<12>(MO.getImm())) {
3141 ErrInfo = "Invalid immediate";
3142 return false;
3143 }
3144 break;
3147 // TODO: We could be stricter about what non-register operands are
3148 // allowed.
3149 if (MO.isReg()) {
3150 ErrInfo = "Expected a non-register operand.";
3151 return false;
3152 }
3153 if (MO.isImm() && !isUInt<20>(MO.getImm())) {
3154 ErrInfo = "Invalid immediate";
3155 return false;
3156 }
3157 break;
3159 // TODO: We could be stricter about what non-register operands are
3160 // allowed.
3161 if (MO.isReg()) {
3162 ErrInfo = "Expected a non-register operand.";
3163 return false;
3164 }
3165 if (MO.isImm() && !isInt<32>(MO.getImm())) {
3166 ErrInfo = "Invalid immediate";
3167 return false;
3168 }
3169 break;
3171 if (MO.isImm()) {
3172 int64_t Imm = MO.getImm();
3173 // VLMAX is represented as -1.
3174 if (!isUInt<5>(Imm) && Imm != -1) {
3175 ErrInfo = "Invalid immediate";
3176 return false;
3177 }
3178 } else if (!MO.isReg()) {
3179 ErrInfo = "Expected a register or immediate operand.";
3180 return false;
3181 }
3182 break;
3183 }
3184 }
3185
3186 const uint64_t TSFlags = Desc.TSFlags;
3187 if (RISCVII::hasVLOp(TSFlags)) {
3188 const MachineOperand &Op = MI.getOperand(RISCVII::getVLOpNum(Desc));
3189 if (!Op.isImm() && !Op.isReg()) {
3190 ErrInfo = "Invalid operand type for VL operand";
3191 return false;
3192 }
3193 if (Op.isReg() && Op.getReg().isValid()) {
3194 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
3195 auto *RC = MRI.getRegClass(Op.getReg());
3196 if (!RISCV::GPRNoX0RegClass.hasSubClassEq(RC)) {
3197 ErrInfo = "Invalid register class for VL operand";
3198 return false;
3199 }
3200 }
3201 if (!RISCVII::hasSEWOp(TSFlags)) {
3202 ErrInfo = "VL operand w/o SEW operand?";
3203 return false;
3204 }
3205 }
3206 if (RISCVII::hasSEWOp(TSFlags)) {
3207 unsigned OpIdx = RISCVII::getSEWOpNum(Desc);
3208 if (!MI.getOperand(OpIdx).isImm()) {
3209 ErrInfo = "SEW value expected to be an immediate";
3210 return false;
3211 }
3212 uint64_t Log2SEW = MI.getOperand(OpIdx).getImm();
3213 if (Log2SEW > 31) {
3214 ErrInfo = "Unexpected SEW value";
3215 return false;
3216 }
3217 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
3218 if (!RISCVVType::isValidSEW(SEW)) {
3219 ErrInfo = "Unexpected SEW value";
3220 return false;
3221 }
3222 }
3223 if (RISCVII::hasVecPolicyOp(TSFlags)) {
3225 if (!MI.getOperand(OpIdx).isImm()) {
3226 ErrInfo = "Policy operand expected to be an immediate";
3227 return false;
3228 }
3229 uint64_t Policy = MI.getOperand(OpIdx).getImm();
3231 ErrInfo = "Invalid Policy Value";
3232 return false;
3233 }
3234 if (!RISCVII::hasVLOp(TSFlags)) {
3235 ErrInfo = "policy operand w/o VL operand?";
3236 return false;
3237 }
3238
3239 // VecPolicy operands can only exist on instructions with passthru/merge
3240 // arguments. Note that not all arguments with passthru have vec policy
3241 // operands- some instructions have implicit policies.
3242 unsigned UseOpIdx;
3243 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
3244 ErrInfo = "policy operand w/o tied operand?";
3245 return false;
3246 }
3247 }
3248
3249 if (int Idx = RISCVII::getFRMOpNum(Desc);
3250 Idx >= 0 && MI.getOperand(Idx).getImm() == RISCVFPRndMode::DYN &&
3251 !MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr)) {
3252 ErrInfo = "dynamic rounding mode should read FRM";
3253 return false;
3254 }
3255
3256 return true;
3257}
3258
3260 const MachineInstr &AddrI,
3261 ExtAddrMode &AM) const {
3262 switch (MemI.getOpcode()) {
3263 default:
3264 return false;
3265 case RISCV::LB:
3266 case RISCV::LBU:
3267 case RISCV::LH:
3268 case RISCV::LH_INX:
3269 case RISCV::LHU:
3270 case RISCV::LW:
3271 case RISCV::LW_INX:
3272 case RISCV::LWU:
3273 case RISCV::LD:
3274 case RISCV::LD_RV32:
3275 case RISCV::FLH:
3276 case RISCV::FLW:
3277 case RISCV::FLD:
3278 case RISCV::SB:
3279 case RISCV::SH:
3280 case RISCV::SH_INX:
3281 case RISCV::SW:
3282 case RISCV::SW_INX:
3283 case RISCV::SD:
3284 case RISCV::SD_RV32:
3285 case RISCV::FSH:
3286 case RISCV::FSW:
3287 case RISCV::FSD:
3288 break;
3289 }
3290
3291 if (MemI.getOperand(0).getReg() == Reg)
3292 return false;
3293
3294 if (AddrI.getOpcode() != RISCV::ADDI || !AddrI.getOperand(1).isReg() ||
3295 !AddrI.getOperand(2).isImm())
3296 return false;
3297
3298 int64_t OldOffset = MemI.getOperand(2).getImm();
3299 int64_t Disp = AddrI.getOperand(2).getImm();
3300 int64_t NewOffset = OldOffset + Disp;
3301 if (!STI.is64Bit())
3302 NewOffset = SignExtend64<32>(NewOffset);
3303
3304 if (!isInt<12>(NewOffset))
3305 return false;
3306
3307 AM.BaseReg = AddrI.getOperand(1).getReg();
3308 AM.ScaledReg = 0;
3309 AM.Scale = 0;
3310 AM.Displacement = NewOffset;
3312 return true;
3313}
3314
3316 const ExtAddrMode &AM) const {
3317
3318 const DebugLoc &DL = MemI.getDebugLoc();
3319 MachineBasicBlock &MBB = *MemI.getParent();
3320
3321 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3322 "Addressing mode not supported for folding");
3323
3324 return BuildMI(MBB, MemI, DL, get(MemI.getOpcode()))
3325 .addReg(MemI.getOperand(0).getReg(), getDefRegState(MemI.mayLoad()))
3326 .addReg(AM.BaseReg)
3327 .addImm(AM.Displacement)
3328 .setMemRefs(MemI.memoperands())
3329 .setMIFlags(MemI.getFlags());
3330}
3331
3332// TODO: At the moment, MIPS introduced paring of instructions operating with
3333// word or double word. This should be extended with more instructions when more
3334// vendors support load/store pairing.
3336 switch (Opc) {
3337 default:
3338 return false;
3339 case RISCV::SW:
3340 case RISCV::SD:
3341 case RISCV::LD:
3342 case RISCV::LW:
3343 return true;
3344 }
3345}
3346
3348 const TargetRegisterInfo *TRI) {
3349 // If this is a volatile load/store, don't mess with it.
3350 if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3)
3351 return false;
3352
3353 if (LdSt.getOperand(1).isFI())
3354 return true;
3355
3356 assert(LdSt.getOperand(1).isReg() && "Expected a reg operand.");
3357 // Can't cluster if the instruction modifies the base register
3358 // or it is update form. e.g. ld x5,8(x5)
3359 if (LdSt.modifiesRegister(LdSt.getOperand(1).getReg(), TRI))
3360 return false;
3361
3362 if (!LdSt.getOperand(2).isImm())
3363 return false;
3364
3365 return true;
3366}
3367
3370 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
3371 const TargetRegisterInfo *TRI) const {
3372 if (!LdSt.mayLoadOrStore())
3373 return false;
3374
3375 // Conservatively, only handle scalar loads/stores for now.
3376 switch (LdSt.getOpcode()) {
3377 case RISCV::LB:
3378 case RISCV::LBU:
3379 case RISCV::SB:
3380 case RISCV::LH:
3381 case RISCV::LH_INX:
3382 case RISCV::LHU:
3383 case RISCV::FLH:
3384 case RISCV::SH:
3385 case RISCV::SH_INX:
3386 case RISCV::FSH:
3387 case RISCV::LW:
3388 case RISCV::LW_INX:
3389 case RISCV::LWU:
3390 case RISCV::FLW:
3391 case RISCV::SW:
3392 case RISCV::SW_INX:
3393 case RISCV::FSW:
3394 case RISCV::LD:
3395 case RISCV::LD_RV32:
3396 case RISCV::FLD:
3397 case RISCV::SD:
3398 case RISCV::SD_RV32:
3399 case RISCV::FSD:
3400 break;
3401 default:
3402 return false;
3403 }
3404 const MachineOperand *BaseOp;
3405 OffsetIsScalable = false;
3406 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
3407 return false;
3408 BaseOps.push_back(BaseOp);
3409 return true;
3410}
3411
3412// TODO: This was copied from SIInstrInfo. Could it be lifted to a common
3413// helper?
3416 const MachineInstr &MI2,
3418 // Only examine the first "base" operand of each instruction, on the
3419 // assumption that it represents the real base address of the memory access.
3420 // Other operands are typically offsets or indices from this base address.
3421 if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front()))
3422 return true;
3423
3424 if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand())
3425 return false;
3426
3427 auto MO1 = *MI1.memoperands_begin();
3428 auto MO2 = *MI2.memoperands_begin();
3429 if (MO1->getAddrSpace() != MO2->getAddrSpace())
3430 return false;
3431
3432 auto Base1 = MO1->getValue();
3433 auto Base2 = MO2->getValue();
3434 if (!Base1 || !Base2)
3435 return false;
3436 Base1 = getUnderlyingObject(Base1);
3437 Base2 = getUnderlyingObject(Base2);
3438
3439 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
3440 return false;
3441
3442 return Base1 == Base2;
3443}
3444
3446 ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1,
3447 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
3448 int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize,
3449 unsigned NumBytes) const {
3450 // If the mem ops (to be clustered) do not have the same base ptr, then they
3451 // should not be clustered
3452 if (!BaseOps1.empty() && !BaseOps2.empty()) {
3453 const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();
3454 const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();
3455 if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2))
3456 return false;
3457 } else if (!BaseOps1.empty() || !BaseOps2.empty()) {
3458 // If only one base op is empty, they do not have the same base ptr
3459 return false;
3460 }
3461
3462 unsigned CacheLineSize =
3463 BaseOps1.front()->getParent()->getMF()->getSubtarget().getCacheLineSize();
3464 // Assume a cache line size of 64 bytes if no size is set in RISCVSubtarget.
3466 // Cluster if the memory operations are on the same or a neighbouring cache
3467 // line, but limit the maximum ClusterSize to avoid creating too much
3468 // additional register pressure.
3469 return ClusterSize <= 4 && std::abs(Offset1 - Offset2) < CacheLineSize;
3470}
3471
3472// Set BaseReg (the base register operand), Offset (the byte offset being
3473// accessed) and the access Width of the passed instruction that reads/writes
3474// memory. Returns false if the instruction does not read/write memory or the
3475// BaseReg/Offset/Width can't be determined. Is not guaranteed to always
3476// recognise base operands and offsets in all cases.
3477// TODO: Add an IsScalable bool ref argument (like the equivalent AArch64
3478// function) and set it as appropriate.
3480 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
3481 LocationSize &Width, const TargetRegisterInfo *TRI) const {
3482 if (!LdSt.mayLoadOrStore())
3483 return false;
3484
3485 // Here we assume the standard RISC-V ISA, which uses a base+offset
3486 // addressing mode. You'll need to relax these conditions to support custom
3487 // load/store instructions.
3488 if (LdSt.getNumExplicitOperands() != 3)
3489 return false;
3490 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
3491 !LdSt.getOperand(2).isImm())
3492 return false;
3493
3494 if (!LdSt.hasOneMemOperand())
3495 return false;
3496
3497 Width = (*LdSt.memoperands_begin())->getSize();
3498 BaseReg = &LdSt.getOperand(1);
3499 Offset = LdSt.getOperand(2).getImm();
3500 return true;
3501}
3502
3504 const MachineInstr &MIa, const MachineInstr &MIb) const {
3505 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
3506 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
3507
3510 return false;
3511
3512 // Retrieve the base register, offset from the base register and width. Width
3513 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
3514 // base registers are identical, and the offset of a lower memory access +
3515 // the width doesn't overlap the offset of a higher memory access,
3516 // then the memory accesses are different.
3517 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3518 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
3519 int64_t OffsetA = 0, OffsetB = 0;
3521 WidthB = LocationSize::precise(0);
3522 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
3523 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
3524 if (BaseOpA->isIdenticalTo(*BaseOpB)) {
3525 int LowOffset = std::min(OffsetA, OffsetB);
3526 int HighOffset = std::max(OffsetA, OffsetB);
3527 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3528 if (LowWidth.hasValue() &&
3529 LowOffset + (int)LowWidth.getValue() <= HighOffset)
3530 return true;
3531 }
3532 }
3533 return false;
3534}
3535
3536std::pair<unsigned, unsigned>
3538 const unsigned Mask = RISCVII::MO_DIRECT_FLAG_MASK;
3539 return std::make_pair(TF & Mask, TF & ~Mask);
3540}
3541
3544 using namespace RISCVII;
3545 static const std::pair<unsigned, const char *> TargetFlags[] = {
3546 {MO_CALL, "riscv-call"},
3547 {MO_LO, "riscv-lo"},
3548 {MO_HI, "riscv-hi"},
3549 {MO_PCREL_LO, "riscv-pcrel-lo"},
3550 {MO_PCREL_HI, "riscv-pcrel-hi"},
3551 {MO_GOT_HI, "riscv-got-hi"},
3552 {MO_TPREL_LO, "riscv-tprel-lo"},
3553 {MO_TPREL_HI, "riscv-tprel-hi"},
3554 {MO_TPREL_ADD, "riscv-tprel-add"},
3555 {MO_TLS_GOT_HI, "riscv-tls-got-hi"},
3556 {MO_TLS_GD_HI, "riscv-tls-gd-hi"},
3557 {MO_TLSDESC_HI, "riscv-tlsdesc-hi"},
3558 {MO_TLSDESC_LOAD_LO, "riscv-tlsdesc-load-lo"},
3559 {MO_TLSDESC_ADD_LO, "riscv-tlsdesc-add-lo"},
3560 {MO_TLSDESC_CALL, "riscv-tlsdesc-call"}};
3561 return ArrayRef(TargetFlags);
3562}
3564 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
3565 const Function &F = MF.getFunction();
3566
3567 // Can F be deduplicated by the linker? If it can, don't outline from it.
3568 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
3569 return false;
3570
3571 // Don't outline from functions with section markings; the program could
3572 // expect that all the code is in the named section.
3573 if (F.hasSection())
3574 return false;
3575
3576 // It's safe to outline from MF.
3577 return true;
3578}
3579
3581 unsigned &Flags) const {
3582 // More accurate safety checking is done in getOutliningCandidateInfo.
3584}
3585
3586// Enum values indicating how an outlined call should be constructed.
3591
3596
3598 const MachineFunction *MF = MBB.getParent();
3599 const Function &F = MF->getFunction();
3600 return F.getFnAttribute("fentry-call").getValueAsBool() ||
3601 F.hasFnAttribute("patchable-function-entry");
3602}
3603
3605 MCRegister RegNo) {
3606 return MI.readsRegister(RegNo, TRI) ||
3607 MI.getDesc().hasImplicitUseOfPhysReg(RegNo);
3608}
3609
3611 const TargetRegisterInfo *TRI, MCRegister RegNo) {
3612 return MI.modifiesRegister(RegNo, TRI) ||
3613 MI.getDesc().hasImplicitDefOfPhysReg(RegNo);
3614}
3615
3617 if (!MBB.back().isReturn())
3618 return true;
3620 return true;
3621
3622 // If the candidate reads the pre-set register
3623 // that can be used for expanding PseudoTAIL instruction,
3624 // then we cannot insert tail call.
3625 const TargetSubtargetInfo &STI = MBB.getParent()->getSubtarget();
3626 MCRegister TailExpandUseRegNo =
3628 for (const MachineInstr &MI : MBB) {
3629 if (isMIReadsReg(MI, STI.getRegisterInfo(), TailExpandUseRegNo))
3630 return true;
3631 if (isMIModifiesReg(MI, STI.getRegisterInfo(), TailExpandUseRegNo))
3632 break;
3633 }
3634 return false;
3635}
3636
3638 // If the expansion register for tail calls is live across the candidate
3639 // outlined call site, we cannot outline that candidate as the expansion
3640 // would clobber the register.
3641 MCRegister TailExpandUseReg =
3642 RISCVII::getTailExpandUseRegNo(STI.getFeatureBits());
3643 if (C.back().isReturn() &&
3644 !C.isAvailableAcrossAndOutOfSeq(TailExpandUseReg, RegInfo)) {
3645 LLVM_DEBUG(dbgs() << "MBB:\n" << *C.getMBB());
3646 LLVM_DEBUG(dbgs() << "Cannot be outlined between: " << C.front() << "and "
3647 << C.back());
3648 LLVM_DEBUG(dbgs() << "Because the tail-call register is live across "
3649 "the proposed outlined function call\n");
3650 return true;
3651 }
3652
3653 // If last instruction is return then we can rely on
3654 // the verification already performed in the getOutliningTypeImpl.
3655 if (C.back().isReturn()) {
3656 assert(!cannotInsertTailCall(*C.getMBB()) &&
3657 "The candidate who uses return instruction must be outlined "
3658 "using tail call");
3659 return false;
3660 }
3661
3662 // Filter out candidates where the X5 register (t0) can't be used to setup
3663 // the function call.
3664 if (llvm::any_of(C, [this](const MachineInstr &MI) {
3665 return isMIModifiesReg(MI, &RegInfo, RISCV::X5);
3666 }))
3667 return true;
3668
3669 return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, RegInfo);
3670}
3671
3672std::optional<std::unique_ptr<outliner::OutlinedFunction>>
3674 const MachineModuleInfo &MMI,
3675 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
3676 unsigned MinRepeats) const {
3677
3678 // Analyze each candidate and erase the ones that are not viable.
3679 llvm::erase_if(RepeatedSequenceLocs, [this](auto Candidate) {
3680 return analyzeCandidate(Candidate);
3681 });
3682
3683 // If the sequence doesn't have enough candidates left, then we're done.
3684 if (RepeatedSequenceLocs.size() < MinRepeats)
3685 return std::nullopt;
3686
3687 // Each RepeatedSequenceLoc is identical.
3688 outliner::Candidate &Candidate = RepeatedSequenceLocs[0];
3689 unsigned InstrSizeCExt =
3690 Candidate.getMF()->getSubtarget<RISCVSubtarget>().hasStdExtZca() ? 2 : 4;
3691 unsigned CallOverhead = 0, FrameOverhead = 0;
3692
3693 // Count the number of CFI instructions in the candidate, if present.
3694 unsigned CFICount = 0;
3695 for (auto &I : Candidate) {
3696 if (I.isCFIInstruction())
3697 CFICount++;
3698 }
3699
3700 // Ensure CFI coverage matches: comparing the number of CFIs in the candidate
3701 // with the total number of CFIs in the parent function for each candidate.
3702 // Outlining only a subset of a function’s CFIs would split the unwind state
3703 // across two code regions and lead to incorrect address offsets between the
3704 // outlined body and the remaining code. To preserve correct unwind info, we
3705 // only outline when all CFIs in the function can be outlined together.
3706 for (outliner::Candidate &C : RepeatedSequenceLocs) {
3707 std::vector<MCCFIInstruction> CFIInstructions =
3708 C.getMF()->getFrameInstructions();
3709
3710 if (CFICount > 0 && CFICount != CFIInstructions.size())
3711 return std::nullopt;
3712 }
3713
3715 if (Candidate.back().isReturn()) {
3717 // tail call = auipc + jalr in the worst case without linker relaxation.
3718 // FIXME: This code suggests the JALR can be compressed - how?
3719 CallOverhead = 4 + InstrSizeCExt;
3720 // Using tail call we move ret instruction from caller to callee.
3721 FrameOverhead = 0;
3722 } else {
3723 // call t0, function = 8 bytes.
3724 CallOverhead = 8;
3725 // jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
3726 FrameOverhead = InstrSizeCExt;
3727 }
3728
3729 // If we have CFI instructions, we can only outline if the outlined section
3730 // can be a tail call.
3731 if (MOCI != MachineOutlinerTailCall && CFICount > 0)
3732 return std::nullopt;
3733
3734 for (auto &C : RepeatedSequenceLocs)
3735 C.setCallInfo(MOCI, CallOverhead);
3736
3737 unsigned SequenceSize = 0;
3738 for (auto &MI : Candidate)
3739 SequenceSize += getInstSizeInBytes(MI);
3740
3741 return std::make_unique<outliner::OutlinedFunction>(
3742 RepeatedSequenceLocs, SequenceSize, FrameOverhead, MOCI);
3743}
3744
3748 unsigned Flags) const {
3749 MachineInstr &MI = *MBBI;
3750 MachineBasicBlock *MBB = MI.getParent();
3751 const TargetRegisterInfo *TRI =
3752 MBB->getParent()->getSubtarget().getRegisterInfo();
3753 const auto &F = MI.getMF()->getFunction();
3754
3755 // We can only outline CFI instructions if we will tail call the outlined
3756 // function, or fix up the CFI offsets. Currently, CFI instructions are
3757 // outlined only if in a tail call.
3758 if (MI.isCFIInstruction())
3760
3761 if (cannotInsertTailCall(*MBB) &&
3762 (MI.isReturn() || isMIModifiesReg(MI, TRI, RISCV::X5)))
3764
3765 // Make sure the operands don't reference something unsafe.
3766 for (const auto &MO : MI.operands()) {
3767
3768 // pcrel-hi and pcrel-lo can't put in separate sections, filter that out
3769 // if any possible.
3770 if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO &&
3771 (MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() ||
3772 F.hasSection() || F.getSectionPrefix()))
3774 }
3775
3776 if (isLPAD(MI))
3778
3780}
3781
3784 const outliner::OutlinedFunction &OF) const {
3785
3786 if (OF.FrameConstructionID == MachineOutlinerTailCall)
3787 return;
3788
3789 MBB.addLiveIn(RISCV::X5);
3790
3791 // Add in a return instruction to the end of the outlined frame.
3792 MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR))
3793 .addReg(RISCV::X0, RegState::Define)
3794 .addReg(RISCV::X5)
3795 .addImm(0));
3796}
3797
3801
3802 if (C.CallConstructionID == MachineOutlinerTailCall) {
3803 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(RISCV::PseudoTAIL))
3804 .addGlobalAddress(M.getNamedValue(MF.getName()),
3805 /*Offset=*/0, RISCVII::MO_CALL));
3806 return It;
3807 }
3808
3809 // Add in a call instruction to the outlined function at the given location.
3810 It = MBB.insert(It,
3811 BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5)
3812 .addGlobalAddress(M.getNamedValue(MF.getName()), 0,
3814 return It;
3815}
3816
3817std::optional<RegImmPair> RISCVInstrInfo::isAddImmediate(const MachineInstr &MI,
3818 Register Reg) const {
3819 // TODO: Handle cases where Reg is a super- or sub-register of the
3820 // destination register.
3821 const MachineOperand &Op0 = MI.getOperand(0);
3822 if (!Op0.isReg() || Reg != Op0.getReg())
3823 return std::nullopt;
3824
3825 // Don't consider ADDIW as a candidate because the caller may not be aware
3826 // of its sign extension behaviour.
3827 if (MI.getOpcode() == RISCV::ADDI && MI.getOperand(1).isReg() &&
3828 MI.getOperand(2).isImm())
3829 return RegImmPair{MI.getOperand(1).getReg(), MI.getOperand(2).getImm()};
3830
3831 return std::nullopt;
3832}
3833
3834// MIR printer helper function to annotate Operands with a comment.
3836 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
3837 const TargetRegisterInfo *TRI) const {
3838 // Print a generic comment for this operand if there is one.
3839 std::string GenericComment =
3841 if (!GenericComment.empty())
3842 return GenericComment;
3843
3844 const MCInstrDesc &Desc = MI.getDesc();
3845 if (OpIdx >= Desc.getNumOperands())
3846 return std::string();
3847
3848 std::string Comment;
3849 raw_string_ostream OS(Comment);
3850
3851 const MCOperandInfo &OpInfo = Desc.operands()[OpIdx];
3852
3853 // Print the full VType operand of vsetvli/vsetivli instructions, and the SEW
3854 // operand of vector codegen pseudos.
3855 switch (OpInfo.OperandType) {
3858 unsigned Imm = Op.getImm();
3859 RISCVVType::printVType(Imm, OS);
3860 break;
3861 }
3863 unsigned Imm = Op.getImm();
3865 break;
3866 }
3868 unsigned Imm = Op.getImm();
3869 OS << "w" << Imm;
3870 break;
3871 }
3874 unsigned Log2SEW = Op.getImm();
3875 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
3876 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
3877 OS << "e" << SEW;
3878 break;
3879 }
3881 unsigned Policy = Op.getImm();
3883 "Invalid Policy Value");
3884 OS << (Policy & RISCVVType::TAIL_AGNOSTIC ? "ta" : "tu") << ", "
3885 << (Policy & RISCVVType::MASK_AGNOSTIC ? "ma" : "mu");
3886 break;
3887 }
3889 if (Op.isImm() && Op.getImm() == -1)
3890 OS << "vl=VLMAX";
3891 else
3892 OS << "vl";
3893 break;
3895 if (RISCVII::usesVXRM(Desc.TSFlags)) {
3897 auto VXRM = static_cast<RISCVVXRndMode::RoundingMode>(Op.getImm());
3898 OS << "vxrm=" << RISCVVXRndMode::roundingModeToString(VXRM);
3899 } else {
3901 auto FRM = static_cast<RISCVFPRndMode::RoundingMode>(Op.getImm());
3902 OS << "frm=" << RISCVFPRndMode::roundingModeToString(FRM);
3903 }
3904 break;
3905 }
3906
3907 return Comment;
3908}
3909
3910// clang-format off
3911#define CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL) \
3912 RISCV::Pseudo##OP##_##LMUL
3913
3914#define CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL) \
3915 RISCV::Pseudo##OP##_##LMUL##_MASK
3916
3917#define CASE_RVV_OPCODE_LMUL(OP, LMUL) \
3918 CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL): \
3919 case CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL)
3920
3921#define CASE_RVV_OPCODE_UNMASK_WIDEN(OP) \
3922 CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF8): \
3923 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF4): \
3924 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF2): \
3925 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M1): \
3926 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M2): \
3927 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M4)
3928
3929#define CASE_RVV_OPCODE_UNMASK(OP) \
3930 CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
3931 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M8)
3932
3933#define CASE_RVV_OPCODE_MASK_WIDEN(OP) \
3934 CASE_RVV_OPCODE_MASK_LMUL(OP, MF8): \
3935 case CASE_RVV_OPCODE_MASK_LMUL(OP, MF4): \
3936 case CASE_RVV_OPCODE_MASK_LMUL(OP, MF2): \
3937 case CASE_RVV_OPCODE_MASK_LMUL(OP, M1): \
3938 case CASE_RVV_OPCODE_MASK_LMUL(OP, M2): \
3939 case CASE_RVV_OPCODE_MASK_LMUL(OP, M4)
3940
3941#define CASE_RVV_OPCODE_MASK(OP) \
3942 CASE_RVV_OPCODE_MASK_WIDEN(OP): \
3943 case CASE_RVV_OPCODE_MASK_LMUL(OP, M8)
3944
3945#define CASE_RVV_OPCODE_WIDEN(OP) \
3946 CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
3947 case CASE_RVV_OPCODE_MASK_WIDEN(OP)
3948
3949#define CASE_RVV_OPCODE(OP) \
3950 CASE_RVV_OPCODE_UNMASK(OP): \
3951 case CASE_RVV_OPCODE_MASK(OP)
3952// clang-format on
3953
3954// clang-format off
3955#define CASE_VMA_OPCODE_COMMON(OP, TYPE, LMUL) \
3956 RISCV::PseudoV##OP##_##TYPE##_##LMUL
3957
3958#define CASE_VMA_OPCODE_LMULS(OP, TYPE) \
3959 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF8): \
3960 case CASE_VMA_OPCODE_COMMON(OP, TYPE, MF4): \
3961 case CASE_VMA_OPCODE_COMMON(OP, TYPE, MF2): \
3962 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M1): \
3963 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M2): \
3964 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M4): \
3965 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M8)
3966
3967// VFMA instructions are SEW specific.
3968#define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL, SEW) \
3969 RISCV::PseudoV##OP##_##TYPE##_##LMUL##_##SEW
3970
3971#define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW) \
3972 CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1, SEW): \
3973 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2, SEW): \
3974 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4, SEW): \
3975 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8, SEW)
3976
3977#define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW) \
3978 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2, SEW): \
3979 case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW)
3980
3981#define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE, SEW) \
3982 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4, SEW): \
3983 case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW)
3984
3985#define CASE_VFMA_OPCODE_VV(OP) \
3986 CASE_VFMA_OPCODE_LMULS_MF4(OP, VV, E16): \
3987 case CASE_VFMA_OPCODE_LMULS_MF4(OP##_ALT, VV, E16): \
3988 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VV, E32): \
3989 case CASE_VFMA_OPCODE_LMULS_M1(OP, VV, E64)
3990
3991#define CASE_VFMA_SPLATS(OP) \
3992 CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16, E16): \
3993 case CASE_VFMA_OPCODE_LMULS_MF4(OP##_ALT, VFPR16, E16): \
3994 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32, E32): \
3995 case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64, E64)
3996// clang-format on
3997
3999 unsigned &SrcOpIdx1,
4000 unsigned &SrcOpIdx2) const {
4001 const MCInstrDesc &Desc = MI.getDesc();
4002 if (!Desc.isCommutable())
4003 return false;
4004
4005 switch (MI.getOpcode()) {
4006 case RISCV::TH_MVEQZ:
4007 case RISCV::TH_MVNEZ:
4008 // We can't commute operands if operand 2 (i.e., rs1 in
4009 // mveqz/mvnez rd,rs1,rs2) is the zero-register (as it is
4010 // not valid as the in/out-operand 1).
4011 if (MI.getOperand(2).getReg() == RISCV::X0)
4012 return false;
4013 // Operands 1 and 2 are commutable, if we switch the opcode.
4014 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2);
4015 case RISCV::QC_SELECTIEQ:
4016 case RISCV::QC_SELECTINE:
4017 case RISCV::QC_SELECTIIEQ:
4018 case RISCV::QC_SELECTIINE:
4019 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2);
4020 case RISCV::QC_MVEQ:
4021 case RISCV::QC_MVNE:
4022 case RISCV::QC_MVLT:
4023 case RISCV::QC_MVGE:
4024 case RISCV::QC_MVLTU:
4025 case RISCV::QC_MVGEU:
4026 case RISCV::QC_MVEQI:
4027 case RISCV::QC_MVNEI:
4028 case RISCV::QC_MVLTI:
4029 case RISCV::QC_MVGEI:
4030 case RISCV::QC_MVLTUI:
4031 case RISCV::QC_MVGEUI:
4032 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 4);
4033 case RISCV::TH_MULA:
4034 case RISCV::TH_MULAW:
4035 case RISCV::TH_MULAH:
4036 case RISCV::TH_MULS:
4037 case RISCV::TH_MULSW:
4038 case RISCV::TH_MULSH:
4039 // Operands 2 and 3 are commutable.
4040 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
4041 case RISCV::PseudoCCMOVGPRNoX0:
4042 case RISCV::PseudoCCMOVGPR:
4043 // Operands 4 and 5 are commutable.
4044 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5);
4045 case CASE_RVV_OPCODE(VADD_VV):
4046 case CASE_RVV_OPCODE(VAND_VV):
4047 case CASE_RVV_OPCODE(VOR_VV):
4048 case CASE_RVV_OPCODE(VXOR_VV):
4049 case CASE_RVV_OPCODE_MASK(VMSEQ_VV):
4050 case CASE_RVV_OPCODE_MASK(VMSNE_VV):
4051 case CASE_RVV_OPCODE(VMIN_VV):
4052 case CASE_RVV_OPCODE(VMINU_VV):
4053 case CASE_RVV_OPCODE(VMAX_VV):
4054 case CASE_RVV_OPCODE(VMAXU_VV):
4055 case CASE_RVV_OPCODE(VMUL_VV):
4056 case CASE_RVV_OPCODE(VMULH_VV):
4057 case CASE_RVV_OPCODE(VMULHU_VV):
4058 case CASE_RVV_OPCODE_WIDEN(VWADD_VV):
4059 case CASE_RVV_OPCODE_WIDEN(VWADDU_VV):
4060 case CASE_RVV_OPCODE_WIDEN(VWMUL_VV):
4061 case CASE_RVV_OPCODE_WIDEN(VWMULU_VV):
4062 case CASE_RVV_OPCODE_WIDEN(VWMACC_VV):
4063 case CASE_RVV_OPCODE_WIDEN(VWMACCU_VV):
4064 case CASE_RVV_OPCODE_UNMASK(VADC_VVM):
4065 case CASE_RVV_OPCODE(VSADD_VV):
4066 case CASE_RVV_OPCODE(VSADDU_VV):
4067 case CASE_RVV_OPCODE(VAADD_VV):
4068 case CASE_RVV_OPCODE(VAADDU_VV):
4069 case CASE_RVV_OPCODE(VSMUL_VV):
4070 // Operands 2 and 3 are commutable.
4071 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
4072 case CASE_VFMA_SPLATS(FMADD):
4073 case CASE_VFMA_SPLATS(FMSUB):
4074 case CASE_VFMA_SPLATS(FMACC):
4075 case CASE_VFMA_SPLATS(FMSAC):
4078 case CASE_VFMA_SPLATS(FNMACC):
4079 case CASE_VFMA_SPLATS(FNMSAC):
4080 case CASE_VFMA_OPCODE_VV(FMACC):
4081 case CASE_VFMA_OPCODE_VV(FMSAC):
4082 case CASE_VFMA_OPCODE_VV(FNMACC):
4083 case CASE_VFMA_OPCODE_VV(FNMSAC):
4084 case CASE_VMA_OPCODE_LMULS(MADD, VX):
4085 case CASE_VMA_OPCODE_LMULS(NMSUB, VX):
4086 case CASE_VMA_OPCODE_LMULS(MACC, VX):
4087 case CASE_VMA_OPCODE_LMULS(NMSAC, VX):
4088 case CASE_VMA_OPCODE_LMULS(MACC, VV):
4089 case CASE_VMA_OPCODE_LMULS(NMSAC, VV): {
4090 // If the tail policy is undisturbed we can't commute.
4091 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
4092 if ((MI.getOperand(RISCVII::getVecPolicyOpNum(MI.getDesc())).getImm() &
4093 1) == 0)
4094 return false;
4095
4096 // For these instructions we can only swap operand 1 and operand 3 by
4097 // changing the opcode.
4098 unsigned CommutableOpIdx1 = 1;
4099 unsigned CommutableOpIdx2 = 3;
4100 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
4101 CommutableOpIdx2))
4102 return false;
4103 return true;
4104 }
4105 case CASE_VFMA_OPCODE_VV(FMADD):
4109 case CASE_VMA_OPCODE_LMULS(MADD, VV):
4110 case CASE_VMA_OPCODE_LMULS(NMSUB, VV): {
4111 // If the tail policy is undisturbed we can't commute.
4112 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
4113 if ((MI.getOperand(RISCVII::getVecPolicyOpNum(MI.getDesc())).getImm() &
4114 1) == 0)
4115 return false;
4116
4117 // For these instructions we have more freedom. We can commute with the
4118 // other multiplicand or with the addend/subtrahend/minuend.
4119
4120 // Any fixed operand must be from source 1, 2 or 3.
4121 if (SrcOpIdx1 != CommuteAnyOperandIndex && SrcOpIdx1 > 3)
4122 return false;
4123 if (SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx2 > 3)
4124 return false;
4125
4126 // It both ops are fixed one must be the tied source.
4127 if (SrcOpIdx1 != CommuteAnyOperandIndex &&
4128 SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx1 != 1 && SrcOpIdx2 != 1)
4129 return false;
4130
4131 // Look for two different register operands assumed to be commutable
4132 // regardless of the FMA opcode. The FMA opcode is adjusted later if
4133 // needed.
4134 if (SrcOpIdx1 == CommuteAnyOperandIndex ||
4135 SrcOpIdx2 == CommuteAnyOperandIndex) {
4136 // At least one of operands to be commuted is not specified and
4137 // this method is free to choose appropriate commutable operands.
4138 unsigned CommutableOpIdx1 = SrcOpIdx1;
4139 if (SrcOpIdx1 == SrcOpIdx2) {
4140 // Both of operands are not fixed. Set one of commutable
4141 // operands to the tied source.
4142 CommutableOpIdx1 = 1;
4143 } else if (SrcOpIdx1 == CommuteAnyOperandIndex) {
4144 // Only one of the operands is not fixed.
4145 CommutableOpIdx1 = SrcOpIdx2;
4146 }
4147
4148 // CommutableOpIdx1 is well defined now. Let's choose another commutable
4149 // operand and assign its index to CommutableOpIdx2.
4150 unsigned CommutableOpIdx2;
4151 if (CommutableOpIdx1 != 1) {
4152 // If we haven't already used the tied source, we must use it now.
4153 CommutableOpIdx2 = 1;
4154 } else {
4155 Register Op1Reg = MI.getOperand(CommutableOpIdx1).getReg();
4156
4157 // The commuted operands should have different registers.
4158 // Otherwise, the commute transformation does not change anything and
4159 // is useless. We use this as a hint to make our decision.
4160 if (Op1Reg != MI.getOperand(2).getReg())
4161 CommutableOpIdx2 = 2;
4162 else
4163 CommutableOpIdx2 = 3;
4164 }
4165
4166 // Assign the found pair of commutable indices to SrcOpIdx1 and
4167 // SrcOpIdx2 to return those values.
4168 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
4169 CommutableOpIdx2))
4170 return false;
4171 }
4172
4173 return true;
4174 }
4175 }
4176
4177 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
4178}
4179
4180// clang-format off
4181#define CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \
4182 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL: \
4183 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL; \
4184 break;
4185
4186#define CASE_VMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \
4187 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \
4188 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \
4189 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \
4190 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \
4191 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \
4192 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \
4193 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8)
4194
4195// VFMA depends on SEW.
4196#define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL, SEW) \
4197 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL##_##SEW: \
4198 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL##_##SEW; \
4199 break;
4200
4201#define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW) \
4202 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1, SEW) \
4203 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2, SEW) \
4204 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4, SEW) \
4205 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8, SEW)
4206
4207#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW) \
4208 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2, SEW) \
4209 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW)
4210
4211#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW) \
4212 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4, SEW) \
4213 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW)
4214
4215#define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \
4216 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VV, E16) \
4217 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP##_ALT, NEWOP##_ALT, VV, E16) \
4218 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \
4219 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64)
4220
4221#define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
4222 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16, E16) \
4223 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP##_ALT, NEWOP##_ALT, VFPR16, E16) \
4224 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32, E32) \
4225 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64, E64)
4226// clang-format on
4227
4229 bool NewMI,
4230 unsigned OpIdx1,
4231 unsigned OpIdx2) const {
4232 auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {
4233 if (NewMI)
4234 return *MI.getParent()->getParent()->CloneMachineInstr(&MI);
4235 return MI;
4236 };
4237
4238 switch (MI.getOpcode()) {
4239 case RISCV::TH_MVEQZ:
4240 case RISCV::TH_MVNEZ: {
4241 auto &WorkingMI = cloneIfNew(MI);
4242 WorkingMI.setDesc(get(MI.getOpcode() == RISCV::TH_MVEQZ ? RISCV::TH_MVNEZ
4243 : RISCV::TH_MVEQZ));
4244 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1,
4245 OpIdx2);
4246 }
4247 case RISCV::QC_SELECTIEQ:
4248 case RISCV::QC_SELECTINE:
4249 case RISCV::QC_SELECTIIEQ:
4250 case RISCV::QC_SELECTIINE:
4251 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
4252 case RISCV::QC_MVEQ:
4253 case RISCV::QC_MVNE:
4254 case RISCV::QC_MVLT:
4255 case RISCV::QC_MVGE:
4256 case RISCV::QC_MVLTU:
4257 case RISCV::QC_MVGEU:
4258 case RISCV::QC_MVEQI:
4259 case RISCV::QC_MVNEI:
4260 case RISCV::QC_MVLTI:
4261 case RISCV::QC_MVGEI:
4262 case RISCV::QC_MVLTUI:
4263 case RISCV::QC_MVGEUI: {
4264 auto &WorkingMI = cloneIfNew(MI);
4265 WorkingMI.setDesc(get(getInverseXqcicmOpcode(MI.getOpcode())));
4266 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1,
4267 OpIdx2);
4268 }
4269 case RISCV::PseudoCCMOVGPRNoX0:
4270 case RISCV::PseudoCCMOVGPR: {
4271 // CCMOV can be commuted by inverting the condition.
4272 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
4274 auto &WorkingMI = cloneIfNew(MI);
4275 WorkingMI.getOperand(3).setImm(CC);
4276 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false,
4277 OpIdx1, OpIdx2);
4278 }
4279 case CASE_VFMA_SPLATS(FMACC):
4280 case CASE_VFMA_SPLATS(FMADD):
4281 case CASE_VFMA_SPLATS(FMSAC):
4282 case CASE_VFMA_SPLATS(FMSUB):
4283 case CASE_VFMA_SPLATS(FNMACC):
4285 case CASE_VFMA_SPLATS(FNMSAC):
4287 case CASE_VFMA_OPCODE_VV(FMACC):
4288 case CASE_VFMA_OPCODE_VV(FMSAC):
4289 case CASE_VFMA_OPCODE_VV(FNMACC):
4290 case CASE_VFMA_OPCODE_VV(FNMSAC):
4291 case CASE_VMA_OPCODE_LMULS(MADD, VX):
4292 case CASE_VMA_OPCODE_LMULS(NMSUB, VX):
4293 case CASE_VMA_OPCODE_LMULS(MACC, VX):
4294 case CASE_VMA_OPCODE_LMULS(NMSAC, VX):
4295 case CASE_VMA_OPCODE_LMULS(MACC, VV):
4296 case CASE_VMA_OPCODE_LMULS(NMSAC, VV): {
4297 // It only make sense to toggle these between clobbering the
4298 // addend/subtrahend/minuend one of the multiplicands.
4299 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
4300 assert((OpIdx1 == 3 || OpIdx2 == 3) && "Unexpected opcode index");
4301 unsigned Opc;
4302 switch (MI.getOpcode()) {
4303 default:
4304 llvm_unreachable("Unexpected opcode");
4305 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMACC, FMADD)
4306 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMADD, FMACC)
4313 CASE_VFMA_CHANGE_OPCODE_VV(FMACC, FMADD)
4317 CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX)
4318 CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX)
4319 CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX)
4320 CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX)
4321 CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV)
4322 CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV)
4323 }
4324
4325 auto &WorkingMI = cloneIfNew(MI);
4326 WorkingMI.setDesc(get(Opc));
4327 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
4328 OpIdx1, OpIdx2);
4329 }
4330 case CASE_VFMA_OPCODE_VV(FMADD):
4334 case CASE_VMA_OPCODE_LMULS(MADD, VV):
4335 case CASE_VMA_OPCODE_LMULS(NMSUB, VV): {
4336 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
4337 // If one of the operands, is the addend we need to change opcode.
4338 // Otherwise we're just swapping 2 of the multiplicands.
4339 if (OpIdx1 == 3 || OpIdx2 == 3) {
4340 unsigned Opc;
4341 switch (MI.getOpcode()) {
4342 default:
4343 llvm_unreachable("Unexpected opcode");
4344 CASE_VFMA_CHANGE_OPCODE_VV(FMADD, FMACC)
4348 CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV)
4349 CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV)
4350 }
4351
4352 auto &WorkingMI = cloneIfNew(MI);
4353 WorkingMI.setDesc(get(Opc));
4354 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
4355 OpIdx1, OpIdx2);
4356 }
4357 // Let the default code handle it.
4358 break;
4359 }
4360 }
4361
4362 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
4363}
4364
4365#undef CASE_VMA_CHANGE_OPCODE_COMMON
4366#undef CASE_VMA_CHANGE_OPCODE_LMULS
4367#undef CASE_VFMA_CHANGE_OPCODE_COMMON
4368#undef CASE_VFMA_CHANGE_OPCODE_LMULS_M1
4369#undef CASE_VFMA_CHANGE_OPCODE_LMULS_MF2
4370#undef CASE_VFMA_CHANGE_OPCODE_LMULS_MF4
4371#undef CASE_VFMA_CHANGE_OPCODE_VV
4372#undef CASE_VFMA_CHANGE_OPCODE_SPLATS
4373
4374#undef CASE_RVV_OPCODE_UNMASK_LMUL
4375#undef CASE_RVV_OPCODE_MASK_LMUL
4376#undef CASE_RVV_OPCODE_LMUL
4377#undef CASE_RVV_OPCODE_UNMASK_WIDEN
4378#undef CASE_RVV_OPCODE_UNMASK
4379#undef CASE_RVV_OPCODE_MASK_WIDEN
4380#undef CASE_RVV_OPCODE_MASK
4381#undef CASE_RVV_OPCODE_WIDEN
4382#undef CASE_RVV_OPCODE
4383
4384#undef CASE_VMA_OPCODE_COMMON
4385#undef CASE_VMA_OPCODE_LMULS
4386#undef CASE_VFMA_OPCODE_COMMON
4387#undef CASE_VFMA_OPCODE_LMULS_M1
4388#undef CASE_VFMA_OPCODE_LMULS_MF2
4389#undef CASE_VFMA_OPCODE_LMULS_MF4
4390#undef CASE_VFMA_OPCODE_VV
4391#undef CASE_VFMA_SPLATS
4392
4394 switch (MI.getOpcode()) {
4395 default:
4396 break;
4397 case RISCV::ADD:
4398 case RISCV::OR:
4399 case RISCV::XOR:
4400 // Normalize (so we hit the next if clause).
4401 // add/[x]or rd, zero, rs => add/[x]or rd, rs, zero
4402 if (MI.getOperand(1).getReg() == RISCV::X0)
4403 commuteInstruction(MI);
4404 // add/[x]or rd, rs, zero => addi rd, rs, 0
4405 if (MI.getOperand(2).getReg() == RISCV::X0) {
4406 MI.getOperand(2).ChangeToImmediate(0);
4407 MI.setDesc(get(RISCV::ADDI));
4408 return true;
4409 }
4410 // xor rd, rs, rs => addi rd, zero, 0
4411 if (MI.getOpcode() == RISCV::XOR &&
4412 MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
4413 MI.getOperand(1).setReg(RISCV::X0);
4414 MI.getOperand(2).ChangeToImmediate(0);
4415 MI.setDesc(get(RISCV::ADDI));
4416 return true;
4417 }
4418 break;
4419 case RISCV::ORI:
4420 case RISCV::XORI:
4421 // [x]ori rd, zero, N => addi rd, zero, N
4422 if (MI.getOperand(1).getReg() == RISCV::X0) {
4423 MI.setDesc(get(RISCV::ADDI));
4424 return true;
4425 }
4426 break;
4427 case RISCV::SUB:
4428 // sub rd, rs, zero => addi rd, rs, 0
4429 if (MI.getOperand(2).getReg() == RISCV::X0) {
4430 MI.getOperand(2).ChangeToImmediate(0);
4431 MI.setDesc(get(RISCV::ADDI));
4432 return true;
4433 }
4434 break;
4435 case RISCV::SUBW:
4436 // subw rd, rs, zero => addiw rd, rs, 0
4437 if (MI.getOperand(2).getReg() == RISCV::X0) {
4438 MI.getOperand(2).ChangeToImmediate(0);
4439 MI.setDesc(get(RISCV::ADDIW));
4440 return true;
4441 }
4442 break;
4443 case RISCV::ADDW:
4444 // Normalize (so we hit the next if clause).
4445 // addw rd, zero, rs => addw rd, rs, zero
4446 if (MI.getOperand(1).getReg() == RISCV::X0)
4447 commuteInstruction(MI);
4448 // addw rd, rs, zero => addiw rd, rs, 0
4449 if (MI.getOperand(2).getReg() == RISCV::X0) {
4450 MI.getOperand(2).ChangeToImmediate(0);
4451 MI.setDesc(get(RISCV::ADDIW));
4452 return true;
4453 }
4454 break;
4455 case RISCV::SH1ADD:
4456 case RISCV::SH1ADD_UW:
4457 case RISCV::SH2ADD:
4458 case RISCV::SH2ADD_UW:
4459 case RISCV::SH3ADD:
4460 case RISCV::SH3ADD_UW:
4461 // shNadd[.uw] rd, zero, rs => addi rd, rs, 0
4462 if (MI.getOperand(1).getReg() == RISCV::X0) {
4463 MI.removeOperand(1);
4464 MI.addOperand(MachineOperand::CreateImm(0));
4465 MI.setDesc(get(RISCV::ADDI));
4466 return true;
4467 }
4468 // shNadd[.uw] rd, rs, zero => slli[.uw] rd, rs, N
4469 if (MI.getOperand(2).getReg() == RISCV::X0) {
4470 MI.removeOperand(2);
4471 unsigned Opc = MI.getOpcode();
4472 if (Opc == RISCV::SH1ADD_UW || Opc == RISCV::SH2ADD_UW ||
4473 Opc == RISCV::SH3ADD_UW) {
4475 MI.setDesc(get(RISCV::SLLI_UW));
4476 return true;
4477 }
4479 MI.setDesc(get(RISCV::SLLI));
4480 return true;
4481 }
4482 break;
4483 case RISCV::AND:
4484 case RISCV::MUL:
4485 case RISCV::MULH:
4486 case RISCV::MULHSU:
4487 case RISCV::MULHU:
4488 case RISCV::MULW:
4489 // and rd, zero, rs => addi rd, zero, 0
4490 // mul* rd, zero, rs => addi rd, zero, 0
4491 // and rd, rs, zero => addi rd, zero, 0
4492 // mul* rd, rs, zero => addi rd, zero, 0
4493 if (MI.getOperand(1).getReg() == RISCV::X0 ||
4494 MI.getOperand(2).getReg() == RISCV::X0) {
4495 MI.getOperand(1).setReg(RISCV::X0);
4496 MI.getOperand(2).ChangeToImmediate(0);
4497 MI.setDesc(get(RISCV::ADDI));
4498 return true;
4499 }
4500 break;
4501 case RISCV::ANDI:
4502 // andi rd, zero, C => addi rd, zero, 0
4503 if (MI.getOperand(1).getReg() == RISCV::X0) {
4504 MI.getOperand(2).setImm(0);
4505 MI.setDesc(get(RISCV::ADDI));
4506 return true;
4507 }
4508 break;
4509 case RISCV::SLL:
4510 case RISCV::SRL:
4511 case RISCV::SRA:
4512 // shift rd, zero, rs => addi rd, zero, 0
4513 if (MI.getOperand(1).getReg() == RISCV::X0) {
4514 MI.getOperand(2).ChangeToImmediate(0);
4515 MI.setDesc(get(RISCV::ADDI));
4516 return true;
4517 }
4518 // shift rd, rs, zero => addi rd, rs, 0
4519 if (MI.getOperand(2).getReg() == RISCV::X0) {
4520 MI.getOperand(2).ChangeToImmediate(0);
4521 MI.setDesc(get(RISCV::ADDI));
4522 return true;
4523 }
4524 break;
4525 case RISCV::SLLW:
4526 case RISCV::SRLW:
4527 case RISCV::SRAW:
4528 // shiftw rd, zero, rs => addi rd, zero, 0
4529 if (MI.getOperand(1).getReg() == RISCV::X0) {
4530 MI.getOperand(2).ChangeToImmediate(0);
4531 MI.setDesc(get(RISCV::ADDI));
4532 return true;
4533 }
4534 break;
4535 case RISCV::SLLI:
4536 case RISCV::SRLI:
4537 case RISCV::SRAI:
4538 case RISCV::SLLIW:
4539 case RISCV::SRLIW:
4540 case RISCV::SRAIW:
4541 case RISCV::SLLI_UW:
4542 // shiftimm rd, zero, N => addi rd, zero, 0
4543 if (MI.getOperand(1).getReg() == RISCV::X0) {
4544 MI.getOperand(2).setImm(0);
4545 MI.setDesc(get(RISCV::ADDI));
4546 return true;
4547 }
4548 break;
4549 case RISCV::SLTU:
4550 case RISCV::ADD_UW:
4551 // sltu rd, zero, zero => addi rd, zero, 0
4552 // add.uw rd, zero, zero => addi rd, zero, 0
4553 if (MI.getOperand(1).getReg() == RISCV::X0 &&
4554 MI.getOperand(2).getReg() == RISCV::X0) {
4555 MI.getOperand(2).ChangeToImmediate(0);
4556 MI.setDesc(get(RISCV::ADDI));
4557 return true;
4558 }
4559 // add.uw rd, zero, rs => addi rd, rs, 0
4560 if (MI.getOpcode() == RISCV::ADD_UW &&
4561 MI.getOperand(1).getReg() == RISCV::X0) {
4562 MI.removeOperand(1);
4563 MI.addOperand(MachineOperand::CreateImm(0));
4564 MI.setDesc(get(RISCV::ADDI));
4565 }
4566 break;
4567 case RISCV::SLTIU:
4568 // sltiu rd, zero, NZC => addi rd, zero, 1
4569 // sltiu rd, zero, 0 => addi rd, zero, 0
4570 if (MI.getOperand(1).getReg() == RISCV::X0) {
4571 MI.getOperand(2).setImm(MI.getOperand(2).getImm() != 0);
4572 MI.setDesc(get(RISCV::ADDI));
4573 return true;
4574 }
4575 break;
4576 case RISCV::SEXT_H:
4577 case RISCV::SEXT_B:
4578 case RISCV::ZEXT_H_RV32:
4579 case RISCV::ZEXT_H_RV64:
4580 // sext.[hb] rd, zero => addi rd, zero, 0
4581 // zext.h rd, zero => addi rd, zero, 0
4582 if (MI.getOperand(1).getReg() == RISCV::X0) {
4583 MI.addOperand(MachineOperand::CreateImm(0));
4584 MI.setDesc(get(RISCV::ADDI));
4585 return true;
4586 }
4587 break;
4588 case RISCV::MIN:
4589 case RISCV::MINU:
4590 case RISCV::MAX:
4591 case RISCV::MAXU:
4592 // min|max rd, rs, rs => addi rd, rs, 0
4593 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
4594 MI.getOperand(2).ChangeToImmediate(0);
4595 MI.setDesc(get(RISCV::ADDI));
4596 return true;
4597 }
4598 break;
4599 case RISCV::BEQ:
4600 case RISCV::BNE:
4601 // b{eq,ne} zero, rs, imm => b{eq,ne} rs, zero, imm
4602 if (MI.getOperand(0).getReg() == RISCV::X0) {
4603 MachineOperand MO0 = MI.getOperand(0);
4604 MI.removeOperand(0);
4605 MI.insert(MI.operands_begin() + 1, {MO0});
4606 }
4607 break;
4608 case RISCV::BLTU:
4609 // bltu zero, rs, imm => bne rs, zero, imm
4610 if (MI.getOperand(0).getReg() == RISCV::X0) {
4611 MachineOperand MO0 = MI.getOperand(0);
4612 MI.removeOperand(0);
4613 MI.insert(MI.operands_begin() + 1, {MO0});
4614 MI.setDesc(get(RISCV::BNE));
4615 }
4616 break;
4617 case RISCV::BGEU:
4618 // bgeu zero, rs, imm => beq rs, zero, imm
4619 if (MI.getOperand(0).getReg() == RISCV::X0) {
4620 MachineOperand MO0 = MI.getOperand(0);
4621 MI.removeOperand(0);
4622 MI.insert(MI.operands_begin() + 1, {MO0});
4623 MI.setDesc(get(RISCV::BEQ));
4624 }
4625 break;
4626 }
4627 return false;
4628}
4629
4630// clang-format off
4631#define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \
4632 RISCV::PseudoV##OP##_##LMUL##_TIED
4633
4634#define CASE_WIDEOP_OPCODE_LMULS(OP) \
4635 CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \
4636 case CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \
4637 case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \
4638 case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \
4639 case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \
4640 case CASE_WIDEOP_OPCODE_COMMON(OP, M4)
4641
4642#define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \
4643 case RISCV::PseudoV##OP##_##LMUL##_TIED: \
4644 NewOpc = RISCV::PseudoV##OP##_##LMUL; \
4645 break;
4646
4647#define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
4648 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \
4649 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \
4650 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \
4651 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \
4652 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \
4653 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4)
4654
4655// FP Widening Ops may by SEW aware. Create SEW aware cases for these cases.
4656#define CASE_FP_WIDEOP_OPCODE_COMMON(OP, LMUL, SEW) \
4657 RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED
4658
4659#define CASE_FP_WIDEOP_OPCODE_LMULS(OP) \
4660 CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF4, E16): \
4661 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E16): \
4662 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E32): \
4663 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E16): \
4664 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E32): \
4665 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E16): \
4666 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E32): \
4667 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E16): \
4668 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E32) \
4669
4670#define CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL, SEW) \
4671 case RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED: \
4672 NewOpc = RISCV::PseudoV##OP##_##LMUL##_##SEW; \
4673 break;
4674
4675#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
4676 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4, E16) \
4677 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E16) \
4678 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E32) \
4679 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E16) \
4680 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E32) \
4681 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E16) \
4682 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E32) \
4683 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16) \
4684 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E32) \
4685
4686#define CASE_FP_WIDEOP_OPCODE_LMULS_ALT(OP) \
4687 CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF4, E16): \
4688 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E16): \
4689 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E16): \
4690 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E16): \
4691 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E16)
4692
4693#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(OP) \
4694 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4, E16) \
4695 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E16) \
4696 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E16) \
4697 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E16) \
4698 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16)
4699// clang-format on
4700
4702 LiveVariables *LV,
4703 LiveIntervals *LIS) const {
4705 switch (MI.getOpcode()) {
4706 default:
4707 return nullptr;
4708 case CASE_FP_WIDEOP_OPCODE_LMULS_ALT(FWADD_ALT_WV):
4709 case CASE_FP_WIDEOP_OPCODE_LMULS_ALT(FWSUB_ALT_WV):
4710 case CASE_FP_WIDEOP_OPCODE_LMULS(FWADD_WV):
4711 case CASE_FP_WIDEOP_OPCODE_LMULS(FWSUB_WV): {
4712 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
4713 MI.getNumExplicitOperands() == 7 &&
4714 "Expect 7 explicit operands rd, rs2, rs1, rm, vl, sew, policy");
4715 // If the tail policy is undisturbed we can't convert.
4716 if ((MI.getOperand(RISCVII::getVecPolicyOpNum(MI.getDesc())).getImm() &
4717 1) == 0)
4718 return nullptr;
4719 // clang-format off
4720 unsigned NewOpc;
4721 switch (MI.getOpcode()) {
4722 default:
4723 llvm_unreachable("Unexpected opcode");
4728 }
4729 // clang-format on
4730
4731 MachineBasicBlock &MBB = *MI.getParent();
4732 MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
4733 .add(MI.getOperand(0))
4734 .addReg(MI.getOperand(0).getReg(), RegState::Undef)
4735 .add(MI.getOperand(1))
4736 .add(MI.getOperand(2))
4737 .add(MI.getOperand(3))
4738 .add(MI.getOperand(4))
4739 .add(MI.getOperand(5))
4740 .add(MI.getOperand(6));
4741 break;
4742 }
4743 case CASE_WIDEOP_OPCODE_LMULS(WADD_WV):
4744 case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV):
4745 case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV):
4746 case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): {
4747 // If the tail policy is undisturbed we can't convert.
4748 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
4749 MI.getNumExplicitOperands() == 6);
4750 if ((MI.getOperand(RISCVII::getVecPolicyOpNum(MI.getDesc())).getImm() &
4751 1) == 0)
4752 return nullptr;
4753
4754 // clang-format off
4755 unsigned NewOpc;
4756 switch (MI.getOpcode()) {
4757 default:
4758 llvm_unreachable("Unexpected opcode");
4763 }
4764 // clang-format on
4765
4766 MachineBasicBlock &MBB = *MI.getParent();
4767 MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
4768 .add(MI.getOperand(0))
4769 .addReg(MI.getOperand(0).getReg(), RegState::Undef)
4770 .add(MI.getOperand(1))
4771 .add(MI.getOperand(2))
4772 .add(MI.getOperand(3))
4773 .add(MI.getOperand(4))
4774 .add(MI.getOperand(5));
4775 break;
4776 }
4777 }
4778 MIB.copyImplicitOps(MI);
4779
4780 if (LV) {
4781 unsigned NumOps = MI.getNumOperands();
4782 for (unsigned I = 1; I < NumOps; ++I) {
4783 MachineOperand &Op = MI.getOperand(I);
4784 if (Op.isReg() && Op.isKill())
4785 LV->replaceKillInstruction(Op.getReg(), MI, *MIB);
4786 }
4787 }
4788
4789 if (LIS) {
4790 SlotIndex Idx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);
4791
4792 if (MI.getOperand(0).isEarlyClobber()) {
4793 // Use operand 1 was tied to early-clobber def operand 0, so its live
4794 // interval could have ended at an early-clobber slot. Now they are not
4795 // tied we need to update it to the normal register slot.
4796 LiveInterval &LI = LIS->getInterval(MI.getOperand(1).getReg());
4798 if (S->end == Idx.getRegSlot(true))
4799 S->end = Idx.getRegSlot();
4800 }
4801 }
4802
4803 return MIB;
4804}
4805
4806#undef CASE_WIDEOP_OPCODE_COMMON
4807#undef CASE_WIDEOP_OPCODE_LMULS
4808#undef CASE_WIDEOP_CHANGE_OPCODE_COMMON
4809#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS
4810#undef CASE_FP_WIDEOP_OPCODE_COMMON
4811#undef CASE_FP_WIDEOP_OPCODE_LMULS
4812#undef CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON
4813#undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS
4814
4817 Register DestReg, uint32_t Amount,
4818 MachineInstr::MIFlag Flag) const {
4820 if (llvm::has_single_bit<uint32_t>(Amount)) {
4821 uint32_t ShiftAmount = Log2_32(Amount);
4822 if (ShiftAmount == 0)
4823 return;
4824 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
4825 .addReg(DestReg, RegState::Kill)
4826 .addImm(ShiftAmount)
4827 .setMIFlag(Flag);
4828 } else if (int ShXAmount, ShiftAmount;
4829 STI.hasShlAdd(3) &&
4830 (ShXAmount = isShifted359(Amount, ShiftAmount)) != 0) {
4831 // We can use Zba SHXADD+SLLI instructions for multiply in some cases.
4832 unsigned Opc;
4833 switch (ShXAmount) {
4834 case 1:
4835 Opc = RISCV::SH1ADD;
4836 break;
4837 case 2:
4838 Opc = RISCV::SH2ADD;
4839 break;
4840 case 3:
4841 Opc = RISCV::SH3ADD;
4842 break;
4843 default:
4844 llvm_unreachable("unexpected result of isShifted359");
4845 }
4846 if (ShiftAmount)
4847 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
4848 .addReg(DestReg, RegState::Kill)
4849 .addImm(ShiftAmount)
4850 .setMIFlag(Flag);
4851 BuildMI(MBB, II, DL, get(Opc), DestReg)
4852 .addReg(DestReg, RegState::Kill)
4853 .addReg(DestReg)
4854 .setMIFlag(Flag);
4855 } else if (llvm::has_single_bit<uint32_t>(Amount - 1)) {
4856 Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
4857 uint32_t ShiftAmount = Log2_32(Amount - 1);
4858 BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
4859 .addReg(DestReg)
4860 .addImm(ShiftAmount)
4861 .setMIFlag(Flag);
4862 BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)
4863 .addReg(ScaledRegister, RegState::Kill)
4864 .addReg(DestReg, RegState::Kill)
4865 .setMIFlag(Flag);
4866 } else if (llvm::has_single_bit<uint32_t>(Amount + 1)) {
4867 Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
4868 uint32_t ShiftAmount = Log2_32(Amount + 1);
4869 BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
4870 .addReg(DestReg)
4871 .addImm(ShiftAmount)
4872 .setMIFlag(Flag);
4873 BuildMI(MBB, II, DL, get(RISCV::SUB), DestReg)
4874 .addReg(ScaledRegister, RegState::Kill)
4875 .addReg(DestReg, RegState::Kill)
4876 .setMIFlag(Flag);
4877 } else if (STI.hasStdExtZmmul()) {
4878 Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass);
4879 movImm(MBB, II, DL, N, Amount, Flag);
4880 BuildMI(MBB, II, DL, get(RISCV::MUL), DestReg)
4881 .addReg(DestReg, RegState::Kill)
4883 .setMIFlag(Flag);
4884 } else {
4885 Register Acc;
4886 uint32_t PrevShiftAmount = 0;
4887 for (uint32_t ShiftAmount = 0; Amount >> ShiftAmount; ShiftAmount++) {
4888 if (Amount & (1U << ShiftAmount)) {
4889 if (ShiftAmount)
4890 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
4891 .addReg(DestReg, RegState::Kill)
4892 .addImm(ShiftAmount - PrevShiftAmount)
4893 .setMIFlag(Flag);
4894 if (Amount >> (ShiftAmount + 1)) {
4895 // If we don't have an accmulator yet, create it and copy DestReg.
4896 if (!Acc) {
4897 Acc = MRI.createVirtualRegister(&RISCV::GPRRegClass);
4898 BuildMI(MBB, II, DL, get(TargetOpcode::COPY), Acc)
4899 .addReg(DestReg)
4900 .setMIFlag(Flag);
4901 } else {
4902 BuildMI(MBB, II, DL, get(RISCV::ADD), Acc)
4903 .addReg(Acc, RegState::Kill)
4904 .addReg(DestReg)
4905 .setMIFlag(Flag);
4906 }
4907 }
4908 PrevShiftAmount = ShiftAmount;
4909 }
4910 }
4911 assert(Acc && "Expected valid accumulator");
4912 BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)
4913 .addReg(DestReg, RegState::Kill)
4914 .addReg(Acc, RegState::Kill)
4915 .setMIFlag(Flag);
4916 }
4917}
4918
4921 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
4922 {{MONontemporalBit0, "riscv-nontemporal-domain-bit-0"},
4923 {MONontemporalBit1, "riscv-nontemporal-domain-bit-1"}};
4924 return ArrayRef(TargetFlags);
4925}
4926
4928 return OptLevel >= CodeGenOptLevel::Aggressive
4929 ? STI.getTailDupAggressiveThreshold()
4930 : 2;
4931}
4932
4934 // RVV lacks any support for immediate addressing for stack addresses, so be
4935 // conservative.
4936 unsigned Opcode = MI.getOpcode();
4937 if (!RISCVVPseudosTable::getPseudoInfo(Opcode) &&
4939 return false;
4940 return true;
4941}
4942
4943/// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs.
4945 const MachineInstr &MI) {
4946 return MI.isCopy() && MI.getOperand(0).getReg().isPhysical() &&
4948 TRI->getMinimalPhysRegClass(MI.getOperand(0).getReg()));
4949}
4950
4951std::optional<std::pair<unsigned, unsigned>>
4953 switch (Opcode) {
4954 default:
4955 return std::nullopt;
4956 case RISCV::PseudoVSPILL2_M1:
4957 case RISCV::PseudoVRELOAD2_M1:
4958 return std::make_pair(2u, 1u);
4959 case RISCV::PseudoVSPILL2_M2:
4960 case RISCV::PseudoVRELOAD2_M2:
4961 return std::make_pair(2u, 2u);
4962 case RISCV::PseudoVSPILL2_M4:
4963 case RISCV::PseudoVRELOAD2_M4:
4964 return std::make_pair(2u, 4u);
4965 case RISCV::PseudoVSPILL3_M1:
4966 case RISCV::PseudoVRELOAD3_M1:
4967 return std::make_pair(3u, 1u);
4968 case RISCV::PseudoVSPILL3_M2:
4969 case RISCV::PseudoVRELOAD3_M2:
4970 return std::make_pair(3u, 2u);
4971 case RISCV::PseudoVSPILL4_M1:
4972 case RISCV::PseudoVRELOAD4_M1:
4973 return std::make_pair(4u, 1u);
4974 case RISCV::PseudoVSPILL4_M2:
4975 case RISCV::PseudoVRELOAD4_M2:
4976 return std::make_pair(4u, 2u);
4977 case RISCV::PseudoVSPILL5_M1:
4978 case RISCV::PseudoVRELOAD5_M1:
4979 return std::make_pair(5u, 1u);
4980 case RISCV::PseudoVSPILL6_M1:
4981 case RISCV::PseudoVRELOAD6_M1:
4982 return std::make_pair(6u, 1u);
4983 case RISCV::PseudoVSPILL7_M1:
4984 case RISCV::PseudoVRELOAD7_M1:
4985 return std::make_pair(7u, 1u);
4986 case RISCV::PseudoVSPILL8_M1:
4987 case RISCV::PseudoVRELOAD8_M1:
4988 return std::make_pair(8u, 1u);
4989 }
4990}
4991
4992bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) {
4993 int16_t MI1FrmOpIdx =
4994 RISCV::getNamedOperandIdx(MI1.getOpcode(), RISCV::OpName::frm);
4995 int16_t MI2FrmOpIdx =
4996 RISCV::getNamedOperandIdx(MI2.getOpcode(), RISCV::OpName::frm);
4997 if (MI1FrmOpIdx < 0 || MI2FrmOpIdx < 0)
4998 return false;
4999 MachineOperand FrmOp1 = MI1.getOperand(MI1FrmOpIdx);
5000 MachineOperand FrmOp2 = MI2.getOperand(MI2FrmOpIdx);
5001 return FrmOp1.getImm() == FrmOp2.getImm();
5002}
5003
5004std::optional<unsigned>
5005RISCV::getVectorLowDemandedScalarBits(unsigned Opcode, unsigned Log2SEW) {
5006 switch (Opcode) {
5007 default:
5008 return std::nullopt;
5009
5010 // 11.6. Vector Single-Width Shift Instructions
5011 case RISCV::VSLL_VX:
5012 case RISCV::VSRL_VX:
5013 case RISCV::VSRA_VX:
5014 // 12.4. Vector Single-Width Scaling Shift Instructions
5015 case RISCV::VSSRL_VX:
5016 case RISCV::VSSRA_VX:
5017 // Zvbb
5018 case RISCV::VROL_VX:
5019 case RISCV::VROR_VX:
5020 // Only the low lg2(SEW) bits of the shift-amount value are used.
5021 return Log2SEW;
5022
5023 // 11.7 Vector Narrowing Integer Right Shift Instructions
5024 case RISCV::VNSRL_WX:
5025 case RISCV::VNSRA_WX:
5026 // 12.5. Vector Narrowing Fixed-Point Clip Instructions
5027 case RISCV::VNCLIPU_WX:
5028 case RISCV::VNCLIP_WX:
5029 // Zvbb
5030 case RISCV::VWSLL_VX:
5031 // Only the low lg2(2*SEW) bits of the shift-amount value are used.
5032 return Log2SEW + 1;
5033
5034 // 11.1. Vector Single-Width Integer Add and Subtract
5035 case RISCV::VADD_VX:
5036 case RISCV::VSUB_VX:
5037 case RISCV::VRSUB_VX:
5038 // 11.2. Vector Widening Integer Add/Subtract
5039 case RISCV::VWADDU_VX:
5040 case RISCV::VWSUBU_VX:
5041 case RISCV::VWADD_VX:
5042 case RISCV::VWSUB_VX:
5043 case RISCV::VWADDU_WX:
5044 case RISCV::VWSUBU_WX:
5045 case RISCV::VWADD_WX:
5046 case RISCV::VWSUB_WX:
5047 // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
5048 case RISCV::VADC_VXM:
5049 case RISCV::VADC_VIM:
5050 case RISCV::VMADC_VXM:
5051 case RISCV::VMADC_VIM:
5052 case RISCV::VMADC_VX:
5053 case RISCV::VSBC_VXM:
5054 case RISCV::VMSBC_VXM:
5055 case RISCV::VMSBC_VX:
5056 // 11.5 Vector Bitwise Logical Instructions
5057 case RISCV::VAND_VX:
5058 case RISCV::VOR_VX:
5059 case RISCV::VXOR_VX:
5060 // 11.8. Vector Integer Compare Instructions
5061 case RISCV::VMSEQ_VX:
5062 case RISCV::VMSNE_VX:
5063 case RISCV::VMSLTU_VX:
5064 case RISCV::VMSLT_VX:
5065 case RISCV::VMSLEU_VX:
5066 case RISCV::VMSLE_VX:
5067 case RISCV::VMSGTU_VX:
5068 case RISCV::VMSGT_VX:
5069 // 11.9. Vector Integer Min/Max Instructions
5070 case RISCV::VMINU_VX:
5071 case RISCV::VMIN_VX:
5072 case RISCV::VMAXU_VX:
5073 case RISCV::VMAX_VX:
5074 // 11.10. Vector Single-Width Integer Multiply Instructions
5075 case RISCV::VMUL_VX:
5076 case RISCV::VMULH_VX:
5077 case RISCV::VMULHU_VX:
5078 case RISCV::VMULHSU_VX:
5079 // 11.11. Vector Integer Divide Instructions
5080 case RISCV::VDIVU_VX:
5081 case RISCV::VDIV_VX:
5082 case RISCV::VREMU_VX:
5083 case RISCV::VREM_VX:
5084 // 11.12. Vector Widening Integer Multiply Instructions
5085 case RISCV::VWMUL_VX:
5086 case RISCV::VWMULU_VX:
5087 case RISCV::VWMULSU_VX:
5088 // 11.13. Vector Single-Width Integer Multiply-Add Instructions
5089 case RISCV::VMACC_VX:
5090 case RISCV::VNMSAC_VX:
5091 case RISCV::VMADD_VX:
5092 case RISCV::VNMSUB_VX:
5093 // 11.14. Vector Widening Integer Multiply-Add Instructions
5094 case RISCV::VWMACCU_VX:
5095 case RISCV::VWMACC_VX:
5096 case RISCV::VWMACCSU_VX:
5097 case RISCV::VWMACCUS_VX:
5098 // 11.15. Vector Integer Merge Instructions
5099 case RISCV::VMERGE_VXM:
5100 // 11.16. Vector Integer Move Instructions
5101 case RISCV::VMV_V_X:
5102 // 12.1. Vector Single-Width Saturating Add and Subtract
5103 case RISCV::VSADDU_VX:
5104 case RISCV::VSADD_VX:
5105 case RISCV::VSSUBU_VX:
5106 case RISCV::VSSUB_VX:
5107 // 12.2. Vector Single-Width Averaging Add and Subtract
5108 case RISCV::VAADDU_VX:
5109 case RISCV::VAADD_VX:
5110 case RISCV::VASUBU_VX:
5111 case RISCV::VASUB_VX:
5112 // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
5113 case RISCV::VSMUL_VX:
5114 // 16.1. Integer Scalar Move Instructions
5115 case RISCV::VMV_S_X:
5116 // Zvbb
5117 case RISCV::VANDN_VX:
5118 return 1U << Log2SEW;
5119 }
5120}
5121
5122unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode) {
5124 RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode);
5125 if (!RVV)
5126 return 0;
5127 return RVV->BaseInstr;
5128}
5129
5130unsigned RISCV::getDestLog2EEW(const MCInstrDesc &Desc, unsigned Log2SEW) {
5131 unsigned DestEEW =
5133 // EEW = 1
5134 if (DestEEW == 0)
5135 return 0;
5136 // EEW = SEW * n
5137 unsigned Scaled = Log2SEW + (DestEEW - 1);
5138 assert(Scaled >= 3 && Scaled <= 6);
5139 return Scaled;
5140}
5141
5142static std::optional<int64_t> getEffectiveImm(const MachineOperand &MO) {
5143 assert(MO.isImm() || MO.getReg().isVirtual());
5144 if (MO.isImm())
5145 return MO.getImm();
5146 const MachineInstr *Def =
5147 MO.getParent()->getMF()->getRegInfo().getVRegDef(MO.getReg());
5148 int64_t Imm;
5149 if (isLoadImm(Def, Imm))
5150 return Imm;
5151 return std::nullopt;
5152}
5153
5154/// Given two VL operands, do we know that LHS <= RHS? Must be used in SSA form.
5156 assert((LHS.isImm() || LHS.getParent()->getMF()->getRegInfo().isSSA()) &&
5157 (RHS.isImm() || RHS.getParent()->getMF()->getRegInfo().isSSA()));
5158 if (LHS.isReg() && RHS.isReg() && LHS.getReg().isVirtual() &&
5159 LHS.getReg() == RHS.getReg())
5160 return true;
5161 if (RHS.isImm() && RHS.getImm() == RISCV::VLMaxSentinel)
5162 return true;
5163 if (LHS.isImm() && LHS.getImm() == 0)
5164 return true;
5165 if (LHS.isImm() && LHS.getImm() == RISCV::VLMaxSentinel)
5166 return false;
5167 std::optional<int64_t> LHSImm = getEffectiveImm(LHS),
5168 RHSImm = getEffectiveImm(RHS);
5169 if (!LHSImm || !RHSImm)
5170 return false;
5171 return LHSImm <= RHSImm;
5172}
5173
5174namespace {
5175class RISCVPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
5176 const MachineInstr *LHS;
5177 const MachineInstr *RHS;
5179
5180public:
5181 RISCVPipelinerLoopInfo(const MachineInstr *LHS, const MachineInstr *RHS,
5183 : LHS(LHS), RHS(RHS), Cond(Cond.begin(), Cond.end()) {}
5184
5185 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
5186 // Make the instructions for loop control be placed in stage 0.
5187 // The predecessors of LHS/RHS are considered by the caller.
5188 if (LHS && MI == LHS)
5189 return true;
5190 if (RHS && MI == RHS)
5191 return true;
5192 return false;
5193 }
5194
5195 std::optional<bool> createTripCountGreaterCondition(
5196 int TC, MachineBasicBlock &MBB,
5197 SmallVectorImpl<MachineOperand> &CondParam) override {
5198 // A branch instruction will be inserted as "if (Cond) goto epilogue".
5199 // Cond is normalized for such use.
5200 // The predecessors of the branch are assumed to have already been inserted.
5201 CondParam = Cond;
5202 return {};
5203 }
5204
5205 void setPreheader(MachineBasicBlock *NewPreheader) override {}
5206
5207 void adjustTripCount(int TripCountAdjust) override {}
5208};
5209} // namespace
5210
5211std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
5213 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
5215 if (analyzeBranch(*LoopBB, TBB, FBB, Cond, /*AllowModify=*/false))
5216 return nullptr;
5217
5218 // Infinite loops are not supported
5219 if (TBB == LoopBB && FBB == LoopBB)
5220 return nullptr;
5221
5222 // Must be conditional branch
5223 if (FBB == nullptr)
5224 return nullptr;
5225
5226 assert((TBB == LoopBB || FBB == LoopBB) &&
5227 "The Loop must be a single-basic-block loop");
5228
5229 // Normalization for createTripCountGreaterCondition()
5230 if (TBB == LoopBB)
5232
5233 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
5234 auto FindRegDef = [&MRI](MachineOperand &Op) -> const MachineInstr * {
5235 if (!Op.isReg())
5236 return nullptr;
5237 Register Reg = Op.getReg();
5238 if (!Reg.isVirtual())
5239 return nullptr;
5240 return MRI.getVRegDef(Reg);
5241 };
5242
5243 const MachineInstr *LHS = FindRegDef(Cond[1]);
5244 const MachineInstr *RHS = FindRegDef(Cond[2]);
5245 if (LHS && LHS->isPHI())
5246 return nullptr;
5247 if (RHS && RHS->isPHI())
5248 return nullptr;
5249
5250 return std::make_unique<RISCVPipelinerLoopInfo>(LHS, RHS, Cond);
5251}
5252
5253// FIXME: We should remove this if we have a default generic scheduling model.
5255 unsigned RVVMCOpcode = RISCV::getRVVMCOpcode(Opc);
5256 Opc = RVVMCOpcode ? RVVMCOpcode : Opc;
5257 switch (Opc) {
5258 default:
5259 return false;
5260 // Integer div/rem.
5261 case RISCV::DIV:
5262 case RISCV::DIVW:
5263 case RISCV::DIVU:
5264 case RISCV::DIVUW:
5265 case RISCV::REM:
5266 case RISCV::REMW:
5267 case RISCV::REMU:
5268 case RISCV::REMUW:
5269 // Floating-point div/sqrt.
5270 case RISCV::FDIV_H:
5271 case RISCV::FDIV_S:
5272 case RISCV::FDIV_D:
5273 case RISCV::FDIV_H_INX:
5274 case RISCV::FDIV_S_INX:
5275 case RISCV::FDIV_D_INX:
5276 case RISCV::FDIV_D_IN32X:
5277 case RISCV::FSQRT_H:
5278 case RISCV::FSQRT_S:
5279 case RISCV::FSQRT_D:
5280 case RISCV::FSQRT_H_INX:
5281 case RISCV::FSQRT_S_INX:
5282 case RISCV::FSQRT_D_INX:
5283 case RISCV::FSQRT_D_IN32X:
5284 // Vector integer div/rem
5285 case RISCV::VDIV_VV:
5286 case RISCV::VDIV_VX:
5287 case RISCV::VDIVU_VV:
5288 case RISCV::VDIVU_VX:
5289 case RISCV::VREM_VV:
5290 case RISCV::VREM_VX:
5291 case RISCV::VREMU_VV:
5292 case RISCV::VREMU_VX:
5293 // Vector floating-point div/sqrt.
5294 case RISCV::VFDIV_VV:
5295 case RISCV::VFDIV_VF:
5296 case RISCV::VFRDIV_VF:
5297 case RISCV::VFSQRT_V:
5298 case RISCV::VFRSQRT7_V:
5299 return true;
5300 }
5301}
5302
5303bool RISCVInstrInfo::isVRegCopy(const MachineInstr *MI, unsigned LMul) const {
5304 if (MI->getOpcode() != TargetOpcode::COPY)
5305 return false;
5306 const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
5307 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
5308
5309 Register DstReg = MI->getOperand(0).getReg();
5310 const TargetRegisterClass *RC = DstReg.isVirtual()
5311 ? MRI.getRegClass(DstReg)
5312 : TRI->getMinimalPhysRegClass(DstReg);
5313
5315 return false;
5316
5317 if (!LMul)
5318 return true;
5319
5320 // TODO: Perhaps we could distinguish segment register classes (e.g. VRN3M2)
5321 // in the future.
5322 auto [RCLMul, RCFractional] =
5324 return (!RCFractional && LMul == RCLMul) || (RCFractional && LMul == 1);
5325}
5326
5328 if (MI.memoperands_empty())
5329 return false;
5330
5331 MachineMemOperand *MMO = *(MI.memoperands_begin());
5332 if (!MMO->isNonTemporal())
5333 return false;
5334
5335 return true;
5336}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerDefault
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
@ Scaled
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
basic Basic Alias true
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static bool cannotInsertTailCall(const MachineBasicBlock &MBB)
#define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP)
#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(OP)
#define CASE_FP_WIDEOP_OPCODE_LMULS(OP)
#define CASE_OPERAND_SIMM(NUM)
static std::optional< unsigned > getLMULForRVVWholeLoadStore(unsigned Opcode)
#define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP)
static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc, unsigned Pattern)
std::optional< unsigned > getFoldedOpcode(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, const RISCVSubtarget &ST)
#define RVV_OPC_LMUL_CASE(OPC, INV)
#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(OP)
static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs)
static unsigned getAddendOperandIdx(unsigned Pattern)
#define CASE_RVV_OPCODE_UNMASK(OP)
#define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP)
static cl::opt< bool > PreferWholeRegisterMove("riscv-prefer-whole-register-move", cl::init(false), cl::Hidden, cl::desc("Prefer whole register move for vector registers."))
#define CASE_VFMA_SPLATS(OP)
unsigned getPredicatedOpcode(unsigned Opcode)
#define CASE_FP_WIDEOP_OPCODE_LMULS_ALT(OP)
#define CASE_WIDEOP_OPCODE_LMULS(OP)
static bool isMIReadsReg(const MachineInstr &MI, const TargetRegisterInfo *TRI, MCRegister RegNo)
#define OPCODE_LMUL_MASK_CASE(OPC)
#define CASE_OPERAND_UIMM_LSB_ZEROS(BITS, SUFFIX)
static bool isFSUB(unsigned Opc)
#define CASE_VMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE)
#define CASE_RVV_OPCODE(OP)
static std::optional< int64_t > getEffectiveImm(const MachineOperand &MO)
#define CASE_VFMA_OPCODE_VV(OP)
MachineOutlinerConstructionID
#define CASE_RVV_OPCODE_WIDEN(OP)
static unsigned getLoadPredicatedOpcode(unsigned Opcode)
static unsigned getSHXADDUWShiftAmount(unsigned Opc)
#define CASE_VMA_OPCODE_LMULS(OP, TYPE)
static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI, const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator MBBI, MachineBasicBlock::const_iterator &DefMBBI, RISCVVType::VLMUL LMul)
static bool isFMUL(unsigned Opc)
static unsigned getInverseXqcicmOpcode(unsigned Opcode)
static bool getFPPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce)
#define OPCODE_LMUL_CASE(OPC)
#define CASE_OPERAND_UIMM(NUM)
static bool canCombineShiftIntoShXAdd(const MachineBasicBlock &MBB, const MachineOperand &MO, unsigned OuterShiftAmt)
Utility routine that checks if.
static bool isCandidatePatchable(const MachineBasicBlock &MBB)
static bool isFADD(unsigned Opc)
static void genShXAddAddShift(MachineInstr &Root, unsigned AddOpIdx, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg)
static bool isLoadImm(const MachineInstr *MI, int64_t &Imm)
static bool isMIModifiesReg(const MachineInstr &MI, const TargetRegisterInfo *TRI, MCRegister RegNo)
static bool canCombineFPFusedMultiply(const MachineInstr &Root, const MachineOperand &MO, bool DoRegPressureReduce)
static bool getSHXADDPatterns(const MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool getFPFusedMultiplyPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce)
static cl::opt< MachineTraceStrategy > ForceMachineCombinerStrategy("riscv-force-machine-combiner-strategy", cl::Hidden, cl::desc("Force machine combiner to use a specific strategy for machine " "trace metrics evaluation."), cl::init(MachineTraceStrategy::TS_NumStrategies), cl::values(clEnumValN(MachineTraceStrategy::TS_Local, "local", "Local strategy."), clEnumValN(MachineTraceStrategy::TS_MinInstrCount, "min-instr", "MinInstrCount strategy.")))
static unsigned getSHXADDShiftAmount(unsigned Opc)
#define CASE_RVV_OPCODE_MASK(OP)
#define RVV_OPC_LMUL_MASK_CASE(OPC, INV)
static MachineInstr * canFoldAsPredicatedOp(Register Reg, const MachineRegisterInfo &MRI, const TargetInstrInfo *TII, const RISCVSubtarget &STI)
Identify instructions that can be folded into a CCMOV instruction, and return the defining instructio...
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
static cl::opt< unsigned > CacheLineSize("cache-line-size", cl::init(0), cl::Hidden, cl::desc("Use this to override the target cache line size when " "specified by the user."))
Value * RHS
Value * LHS
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
static LLVM_ABI DILocation * getMergedLocation(DILocation *LocA, DILocation *LocB)
Attempts to merge LocA and LocB into a single location; see DebugLoc::getMergedLocation for more deta...
bool isBigEndian() const
Definition DataLayout.h:215
A debug info location.
Definition DebugLoc.h:123
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
LiveInterval - This class represents the liveness of a register, or stack slot.
LiveInterval & getInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
const Segment * getSegmentContaining(SlotIndex Idx) const
Return the segment that contains the specified index, or null if there is none.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
bool hasValue() const
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
MCInstBuilder & addReg(MCRegister Reg)
Add a new register operand.
MCInstBuilder & addImm(int64_t Val)
Add a new integer immediate operand.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
bool isConditionalBranch() const
Return true if this is a branch which may fall through to the next instruction or may transfer contro...
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:86
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
const FeatureBitset & getFeatureBits() const
Set of metadata that should be preserved when using BuildMI().
MachineInstrBundleIterator< const MachineInstr > const_iterator
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
MachineInstrBundleIterator< const MachineInstr, true > const_reverse_iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setStackID(int ObjectIdx, uint8_t ID)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isReturn(QueryType Type=AnyInBundle) const
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI void clearKillInfo()
Clears kill flags on all operands.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MI-level patchpoint operands.
Definition StackMaps.h:77
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition StackMaps.h:105
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool IsKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
std::optional< std::unique_ptr< outliner::OutlinedFunction > > getOutliningCandidateInfo(const MachineModuleInfo &MMI, std::vector< outliner::Candidate > &RepeatedSequenceLocs, unsigned MinRepeats) const override
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg) const override
void movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register DstReg, uint64_t Val, MachineInstr::MIFlag Flag=MachineInstr::NoFlags, bool DstRenamable=false, bool DstIsDead=false) const
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
void mulImm(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator II, const DebugLoc &DL, Register DestReg, uint32_t Amt, MachineInstr::MIFlag Flag) const
Generate code to multiply the value in DestReg by Amt - handles all the common optimizations for this...
static bool isPairableLdStInstOpc(unsigned Opc)
Return true if pairing the given load or store may be paired with another.
RISCVInstrInfo(const RISCVSubtarget &STI)
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DstReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &dl, int *BytesAdded=nullptr) const override
bool hasReassociableSibling(const MachineInstr &Inst, bool &Commuted) const override
static bool isLdStSafeToPair(const MachineInstr &LdSt, const TargetRegisterInfo *TRI)
void copyPhysRegVector(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RegClass) const
bool isReMaterializableImpl(const MachineInstr &MI) const override
MachineInstr * optimizeSelect(MachineInstr &MI, SmallPtrSetImpl< MachineInstr * > &SeenMIs, bool) const override
bool isVRegCopy(const MachineInstr *MI, unsigned LMul=0) const
Return true if MI is a COPY to a vector register of a specific LMul, or any kind of vector registers ...
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool isAsCheapAsAMove(const MachineInstr &MI) const override
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, LocationSize &Width, const TargetRegisterInfo *TRI) const
unsigned getTailDuplicateSize(CodeGenOptLevel OptLevel) const override
void getReassociateOperandIndices(const MachineInstr &Root, unsigned Pattern, std::array< unsigned, 5 > &OperandIndices) const override
const RISCVSubtarget & STI
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
std::optional< unsigned > getInverseOpcode(unsigned Opcode) const override
bool simplifyInstruction(MachineInstr &MI) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI, MachineBasicBlock::iterator &MBBI, unsigned Flags) const override
MachineTraceStrategy getMachineCombinerTraceStrategy() const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
std::optional< RegImmPair > isAddImmediate(const MachineInstr &MI, Register Reg) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MCInst getNop() const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
bool analyzeCandidate(outliner::Candidate &C) const
bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
bool requiresNTLHint(const MachineInstr &MI) const
Return true if the instruction requires an NTL hint to be emitted.
void finalizeInsInstrs(MachineInstr &Root, unsigned &Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs) const override
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
bool hasReassociableOperands(const MachineInstr &Inst, const MachineBasicBlock *MBB) const override
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const override
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register DstReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
static RISCVCC::CondCode getCondFromBranchOpc(unsigned Opc)
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
CombinerObjective getCombinerObjective(unsigned Pattern) const override
bool isHighLatencyDef(int Opc) const override
static bool evaluateCondBranch(RISCVCC::CondCode CC, int64_t C0, int64_t C1)
Return the result of the evaluation of C0 CC C1, where CC is a RISCVCC::CondCode.
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
bool optimizeCondBranch(MachineInstr &MI) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
static bool isFromLoadImm(const MachineRegisterInfo &MRI, const MachineOperand &Op, int64_t &Imm)
Return true if the operand is a load immediate instruction and sets Imm to the immediate value.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
const RISCVRegisterInfo * getRegisterInfo() const override
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
SlotIndex - An opaque wrapper around machine indexes.
Definition SlotIndexes.h:66
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
MI-level stackmap operands.
Definition StackMaps.h:36
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition StackMaps.h:51
MI-level Statepoint operands.
Definition StackMaps.h:159
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition StackMaps.h:208
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const
Returns true iff the routine could find two commutable operands in the given machine instruction.
virtual bool hasReassociableOperands(const MachineInstr &Inst, const MachineBasicBlock *MBB) const
Return true when \P Inst has reassociable operands in the same \P MBB.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const
Optional target hook that returns true if MBB is safe to outline from, and returns any target-specifi...
virtual void getReassociateOperandIndices(const MachineInstr &Root, unsigned Pattern, std::array< unsigned, 5 > &OperandIndices) const
The returned array encodes the operand index for each parameter because the operands may be commuted;...
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool hasReassociableSibling(const MachineInstr &Inst, bool &Commuted) const
Return true when \P Inst has reassociable sibling.
virtual std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
const uint8_t TSFlags
Configurable target specific flags.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getZero()
Definition TypeSize.h:349
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
A raw_ostream that writes to an std::string.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
CondCode getInverseBranchCondition(CondCode)
unsigned getBrCond(CondCode CC, unsigned SelectOpc=0)
static bool isValidRoundingMode(unsigned Mode)
static StringRef roundingModeToString(RoundingMode RndMode)
static unsigned getVecPolicyOpNum(const MCInstrDesc &Desc)
static bool usesMaskPolicy(uint64_t TSFlags)
static bool hasRoundModeOp(uint64_t TSFlags)
static unsigned getVLOpNum(const MCInstrDesc &Desc)
static bool hasVLOp(uint64_t TSFlags)
static MCRegister getTailExpandUseRegNo(const FeatureBitset &FeatureBits)
static int getFRMOpNum(const MCInstrDesc &Desc)
static int getVXRMOpNum(const MCInstrDesc &Desc)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool usesVXRM(uint64_t TSFlags)
static bool isRVVWideningReduction(uint64_t TSFlags)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
@ OPERAND_UIMMLOG2XLEN_NONZERO
@ OPERAND_UIMM10_LSB00_NONZERO
@ OPERAND_SIMM10_LSB0000_NONZERO
static unsigned getNF(uint8_t TSFlags)
static RISCVVType::VLMUL getLMul(uint8_t TSFlags)
static bool isTailAgnostic(unsigned VType)
LLVM_ABI void printXSfmmVType(unsigned VType, raw_ostream &OS)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
static bool isValidSEW(unsigned SEW)
LLVM_ABI void printVType(unsigned VType, raw_ostream &OS)
static bool isValidXSfmmVType(unsigned VTypeI)
static unsigned getSEW(unsigned VType)
static VLMUL getVLMUL(unsigned VType)
static bool isValidRoundingMode(unsigned Mode)
static StringRef roundingModeToString(RoundingMode RndMode)
bool hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2)
bool isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS)
Given two VL operands, do we know that LHS <= RHS?
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
unsigned getDestLog2EEW(const MCInstrDesc &Desc, unsigned Log2SEW)
std::optional< unsigned > getVectorLowDemandedScalarBits(unsigned Opcode, unsigned Log2SEW)
std::optional< std::pair< unsigned, unsigned > > isRVVSpillForZvlsseg(unsigned Opcode)
static constexpr unsigned RVVBitsPerBlock
bool isRVVSpill(const MachineInstr &MI)
static constexpr unsigned RVVBytesPerBlock
static constexpr int64_t VLMaxSentinel
bool isVectorCopy(const TargetRegisterInfo *TRI, const MachineInstr &MI)
Return true if MI is a copy that will be lowered to one or more vmvNr.vs.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
InstrType
Represents how an instruction should be mapped by the outliner.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
@ SHXADD_ADD_SLLI_OP2
@ SHXADD_ADD_SLLI_OP1
MachineTraceStrategy
Strategies for selecting traces.
@ TS_MinInstrCount
Select the trace through a block that has the fewest instructions.
@ TS_Local
Select the trace that contains only the current basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
RegState
Flags to represent properties of register accesses.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
bool isValidAtomicOrdering(Int I)
constexpr RegState getKillRegState(bool B)
static const MachineMemOperand::Flags MONontemporalBit0
constexpr RegState getDeadRegState(bool B)
Op::Description Desc
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr RegState getRenamableRegState(bool B)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr RegState getDefRegState(bool B)
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
int isShifted359(T Value, int &Shift)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:198
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
This represents a simple continuous liveness interval for a value.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static bool isRVVRegClass(const TargetRegisterClass *RC)
Used to describe a register and immediate addition.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
The information necessary to create an outlined function for some class of candidate.