LLVM 19.0.0git
RISCVInstrInfo.cpp
Go to the documentation of this file.
1//===-- RISCVInstrInfo.cpp - RISC-V Instruction Information -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the RISC-V implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVInstrInfo.h"
15#include "RISCV.h"
17#include "RISCVSubtarget.h"
18#include "RISCVTargetMachine.h"
19#include "llvm/ADT/STLExtras.h"
36
37using namespace llvm;
38
39#define GEN_CHECK_COMPRESS_INSTR
40#include "RISCVGenCompressInstEmitter.inc"
41
42#define GET_INSTRINFO_CTOR_DTOR
43#define GET_INSTRINFO_NAMED_OPS
44#include "RISCVGenInstrInfo.inc"
45
47 "riscv-prefer-whole-register-move", cl::init(false), cl::Hidden,
48 cl::desc("Prefer whole register move for vector registers."));
49
51 "riscv-force-machine-combiner-strategy", cl::Hidden,
52 cl::desc("Force machine combiner to use a specific strategy for machine "
53 "trace metrics evaluation."),
54 cl::init(MachineTraceStrategy::TS_NumStrategies),
55 cl::values(clEnumValN(MachineTraceStrategy::TS_Local, "local",
56 "Local strategy."),
57 clEnumValN(MachineTraceStrategy::TS_MinInstrCount, "min-instr",
58 "MinInstrCount strategy.")));
59
61
62using namespace RISCV;
63
64#define GET_RISCVVPseudosTable_IMPL
65#include "RISCVGenSearchableTables.inc"
66
67} // namespace llvm::RISCVVPseudosTable
68
70 : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP),
71 STI(STI) {}
72
74 if (STI.hasStdExtCOrZca())
75 return MCInstBuilder(RISCV::C_NOP);
76 return MCInstBuilder(RISCV::ADDI)
77 .addReg(RISCV::X0)
78 .addReg(RISCV::X0)
79 .addImm(0);
80}
81
83 int &FrameIndex) const {
84 unsigned Dummy;
85 return isLoadFromStackSlot(MI, FrameIndex, Dummy);
86}
87
89 int &FrameIndex,
90 unsigned &MemBytes) const {
91 switch (MI.getOpcode()) {
92 default:
93 return 0;
94 case RISCV::LB:
95 case RISCV::LBU:
96 MemBytes = 1;
97 break;
98 case RISCV::LH:
99 case RISCV::LHU:
100 case RISCV::FLH:
101 MemBytes = 2;
102 break;
103 case RISCV::LW:
104 case RISCV::FLW:
105 case RISCV::LWU:
106 MemBytes = 4;
107 break;
108 case RISCV::LD:
109 case RISCV::FLD:
110 MemBytes = 8;
111 break;
112 }
113
114 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
115 MI.getOperand(2).getImm() == 0) {
116 FrameIndex = MI.getOperand(1).getIndex();
117 return MI.getOperand(0).getReg();
118 }
119
120 return 0;
121}
122
124 int &FrameIndex) const {
125 unsigned Dummy;
126 return isStoreToStackSlot(MI, FrameIndex, Dummy);
127}
128
130 int &FrameIndex,
131 unsigned &MemBytes) const {
132 switch (MI.getOpcode()) {
133 default:
134 return 0;
135 case RISCV::SB:
136 MemBytes = 1;
137 break;
138 case RISCV::SH:
139 case RISCV::FSH:
140 MemBytes = 2;
141 break;
142 case RISCV::SW:
143 case RISCV::FSW:
144 MemBytes = 4;
145 break;
146 case RISCV::SD:
147 case RISCV::FSD:
148 MemBytes = 8;
149 break;
150 }
151
152 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
153 MI.getOperand(2).getImm() == 0) {
154 FrameIndex = MI.getOperand(1).getIndex();
155 return MI.getOperand(0).getReg();
156 }
157
158 return 0;
159}
160
161static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg,
162 unsigned NumRegs) {
163 return DstReg > SrcReg && (DstReg - SrcReg) < NumRegs;
164}
165
167 const MachineBasicBlock &MBB,
170 RISCVII::VLMUL LMul) {
172 return false;
173
174 assert(MBBI->getOpcode() == TargetOpcode::COPY &&
175 "Unexpected COPY instruction.");
176 Register SrcReg = MBBI->getOperand(1).getReg();
178
179 bool FoundDef = false;
180 bool FirstVSetVLI = false;
181 unsigned FirstSEW = 0;
182 while (MBBI != MBB.begin()) {
183 --MBBI;
184 if (MBBI->isMetaInstruction())
185 continue;
186
187 if (MBBI->getOpcode() == RISCV::PseudoVSETVLI ||
188 MBBI->getOpcode() == RISCV::PseudoVSETVLIX0 ||
189 MBBI->getOpcode() == RISCV::PseudoVSETIVLI) {
190 // There is a vsetvli between COPY and source define instruction.
191 // vy = def_vop ... (producing instruction)
192 // ...
193 // vsetvli
194 // ...
195 // vx = COPY vy
196 if (!FoundDef) {
197 if (!FirstVSetVLI) {
198 FirstVSetVLI = true;
199 unsigned FirstVType = MBBI->getOperand(2).getImm();
200 RISCVII::VLMUL FirstLMul = RISCVVType::getVLMUL(FirstVType);
201 FirstSEW = RISCVVType::getSEW(FirstVType);
202 // The first encountered vsetvli must have the same lmul as the
203 // register class of COPY.
204 if (FirstLMul != LMul)
205 return false;
206 }
207 // Only permit `vsetvli x0, x0, vtype` between COPY and the source
208 // define instruction.
209 if (MBBI->getOperand(0).getReg() != RISCV::X0)
210 return false;
211 if (MBBI->getOperand(1).isImm())
212 return false;
213 if (MBBI->getOperand(1).getReg() != RISCV::X0)
214 return false;
215 continue;
216 }
217
218 // MBBI is the first vsetvli before the producing instruction.
219 unsigned VType = MBBI->getOperand(2).getImm();
220 // If there is a vsetvli between COPY and the producing instruction.
221 if (FirstVSetVLI) {
222 // If SEW is different, return false.
223 if (RISCVVType::getSEW(VType) != FirstSEW)
224 return false;
225 }
226
227 // If the vsetvli is tail undisturbed, keep the whole register move.
228 if (!RISCVVType::isTailAgnostic(VType))
229 return false;
230
231 // The checking is conservative. We only have register classes for
232 // LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v
233 // for fractional LMUL operations. However, we could not use the vsetvli
234 // lmul for widening operations. The result of widening operation is
235 // 2 x LMUL.
236 return LMul == RISCVVType::getVLMUL(VType);
237 } else if (MBBI->isInlineAsm() || MBBI->isCall()) {
238 return false;
239 } else if (MBBI->getNumDefs()) {
240 // Check all the instructions which will change VL.
241 // For example, vleff has implicit def VL.
242 if (MBBI->modifiesRegister(RISCV::VL))
243 return false;
244
245 // Only converting whole register copies to vmv.v.v when the defining
246 // value appears in the explicit operands.
247 for (const MachineOperand &MO : MBBI->explicit_operands()) {
248 if (!MO.isReg() || !MO.isDef())
249 continue;
250 if (!FoundDef && TRI->regsOverlap(MO.getReg(), SrcReg)) {
251 // We only permit the source of COPY has the same LMUL as the defined
252 // operand.
253 // There are cases we need to keep the whole register copy if the LMUL
254 // is different.
255 // For example,
256 // $x0 = PseudoVSETIVLI 4, 73 // vsetivli zero, 4, e16,m2,ta,m
257 // $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2
258 // # The COPY may be created by vlmul_trunc intrinsic.
259 // $v26m2 = COPY renamable $v28m2, implicit killed $v28m4
260 //
261 // After widening, the valid value will be 4 x e32 elements. If we
262 // convert the COPY to vmv.v.v, it will only copy 4 x e16 elements.
263 // FIXME: The COPY of subregister of Zvlsseg register will not be able
264 // to convert to vmv.v.[v|i] under the constraint.
265 if (MO.getReg() != SrcReg)
266 return false;
267
268 // In widening reduction instructions with LMUL_1 input vector case,
269 // only checking the LMUL is insufficient due to reduction result is
270 // always LMUL_1.
271 // For example,
272 // $x11 = PseudoVSETIVLI 1, 64 // vsetivli a1, 1, e8, m1, ta, mu
273 // $v8m1 = PseudoVWREDSUM_VS_M1 $v26, $v27
274 // $v26 = COPY killed renamable $v8
275 // After widening, The valid value will be 1 x e16 elements. If we
276 // convert the COPY to vmv.v.v, it will only copy 1 x e8 elements.
277 uint64_t TSFlags = MBBI->getDesc().TSFlags;
279 return false;
280
281 // If the producing instruction does not depend on vsetvli, do not
282 // convert COPY to vmv.v.v. For example, VL1R_V or PseudoVRELOAD.
283 if (!RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasVLOp(TSFlags))
284 return false;
285
286 // Found the definition.
287 FoundDef = true;
288 DefMBBI = MBBI;
289 break;
290 }
291 }
292 }
293 }
294
295 return false;
296}
297
300 const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc,
301 const TargetRegisterClass *RegClass) const {
303 RISCVII::VLMUL LMul = RISCVRI::getLMul(RegClass->TSFlags);
304 unsigned NF = RISCVRI::getNF(RegClass->TSFlags);
305
306 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
307 uint16_t DstEncoding = TRI->getEncodingValue(DstReg);
308 auto [LMulVal, Fractional] = RISCVVType::decodeVLMUL(LMul);
309 assert(!Fractional && "It is impossible be fractional lmul here.");
310 unsigned NumRegs = NF * LMulVal;
311 bool ReversedCopy =
312 forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NumRegs);
313 if (ReversedCopy) {
314 // If the src and dest overlap when copying a tuple, we need to copy the
315 // registers in reverse.
316 SrcEncoding += NumRegs - 1;
317 DstEncoding += NumRegs - 1;
318 }
319
320 unsigned I = 0;
321 auto GetCopyInfo = [&](uint16_t SrcEncoding, uint16_t DstEncoding)
322 -> std::tuple<RISCVII::VLMUL, const TargetRegisterClass &, unsigned,
323 unsigned, unsigned> {
324 if (ReversedCopy) {
325 // For reversed copying, if there are enough aligned registers(8/4/2), we
326 // can do a larger copy(LMUL8/4/2).
327 // Besides, we have already known that DstEncoding is larger than
328 // SrcEncoding in forwardCopyWillClobberTuple, so the difference between
329 // DstEncoding and SrcEncoding should be >= LMUL value we try to use to
330 // avoid clobbering.
331 uint16_t Diff = DstEncoding - SrcEncoding;
332 if (I + 8 <= NumRegs && Diff >= 8 && SrcEncoding % 8 == 7 &&
333 DstEncoding % 8 == 7)
334 return {RISCVII::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V,
335 RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8};
336 if (I + 4 <= NumRegs && Diff >= 4 && SrcEncoding % 4 == 3 &&
337 DstEncoding % 4 == 3)
338 return {RISCVII::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V,
339 RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4};
340 if (I + 2 <= NumRegs && Diff >= 2 && SrcEncoding % 2 == 1 &&
341 DstEncoding % 2 == 1)
342 return {RISCVII::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V,
343 RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2};
344 // Or we should do LMUL1 copying.
345 return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
346 RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
347 }
348
349 // For forward copying, if source register encoding and destination register
350 // encoding are aligned to 8/4/2, we can do a LMUL8/4/2 copying.
351 if (I + 8 <= NumRegs && SrcEncoding % 8 == 0 && DstEncoding % 8 == 0)
352 return {RISCVII::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V,
353 RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8};
354 if (I + 4 <= NumRegs && SrcEncoding % 4 == 0 && DstEncoding % 4 == 0)
355 return {RISCVII::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V,
356 RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4};
357 if (I + 2 <= NumRegs && SrcEncoding % 2 == 0 && DstEncoding % 2 == 0)
358 return {RISCVII::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V,
359 RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2};
360 // Or we should do LMUL1 copying.
361 return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
362 RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
363 };
364 auto FindRegWithEncoding = [&TRI](const TargetRegisterClass &RegClass,
365 uint16_t Encoding) {
366 ArrayRef<MCPhysReg> Regs = RegClass.getRegisters();
367 const auto *FoundReg = llvm::find_if(Regs, [&](MCPhysReg Reg) {
368 return TRI->getEncodingValue(Reg) == Encoding;
369 });
370 // We should be always able to find one valid register.
371 assert(FoundReg != Regs.end());
372 return *FoundReg;
373 };
374 while (I != NumRegs) {
375 // For non-segment copying, we only do this once as the registers are always
376 // aligned.
377 // For segment copying, we may do this several times. If the registers are
378 // aligned to larger LMUL, we can eliminate some copyings.
379 auto [LMulCopied, RegClass, Opc, VVOpc, VIOpc] =
380 GetCopyInfo(SrcEncoding, DstEncoding);
381 auto [NumCopied, _] = RISCVVType::decodeVLMUL(LMulCopied);
382
384 if (LMul == LMulCopied &&
385 isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) {
386 Opc = VVOpc;
387 if (DefMBBI->getOpcode() == VIOpc)
388 Opc = VIOpc;
389 }
390
391 // Emit actual copying.
392 // For reversed copying, the encoding should be decreased.
393 MCRegister ActualSrcReg = FindRegWithEncoding(
394 RegClass, ReversedCopy ? (SrcEncoding - NumCopied + 1) : SrcEncoding);
395 MCRegister ActualDstReg = FindRegWithEncoding(
396 RegClass, ReversedCopy ? (DstEncoding - NumCopied + 1) : DstEncoding);
397
398 auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), ActualDstReg);
399 bool UseVMV_V_I = RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_I;
400 bool UseVMV = UseVMV_V_I || RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_V;
401 if (UseVMV)
402 MIB.addReg(ActualDstReg, RegState::Undef);
403 if (UseVMV_V_I)
404 MIB = MIB.add(DefMBBI->getOperand(2));
405 else
406 MIB = MIB.addReg(ActualSrcReg, getKillRegState(KillSrc));
407 if (UseVMV) {
408 const MCInstrDesc &Desc = DefMBBI->getDesc();
409 MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL
410 MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW
411 MIB.addImm(0); // tu, mu
412 MIB.addReg(RISCV::VL, RegState::Implicit);
413 MIB.addReg(RISCV::VTYPE, RegState::Implicit);
414 }
415
416 // If we are copying reversely, we should decrease the encoding.
417 SrcEncoding += (ReversedCopy ? -NumCopied : NumCopied);
418 DstEncoding += (ReversedCopy ? -NumCopied : NumCopied);
419 I += NumCopied;
420 }
421}
422
425 const DebugLoc &DL, MCRegister DstReg,
426 MCRegister SrcReg, bool KillSrc) const {
428
429 if (RISCV::GPRRegClass.contains(DstReg, SrcReg)) {
430 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg)
431 .addReg(SrcReg, getKillRegState(KillSrc))
432 .addImm(0);
433 return;
434 }
435
436 if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) {
437 // Emit an ADDI for both parts of GPRPair.
438 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
439 TRI->getSubReg(DstReg, RISCV::sub_gpr_even))
440 .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_even),
441 getKillRegState(KillSrc))
442 .addImm(0);
443 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
444 TRI->getSubReg(DstReg, RISCV::sub_gpr_odd))
445 .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_odd),
446 getKillRegState(KillSrc))
447 .addImm(0);
448 return;
449 }
450
451 // Handle copy from csr
452 if (RISCV::VCSRRegClass.contains(SrcReg) &&
453 RISCV::GPRRegClass.contains(DstReg)) {
454 BuildMI(MBB, MBBI, DL, get(RISCV::CSRRS), DstReg)
455 .addImm(RISCVSysReg::lookupSysRegByName(TRI->getName(SrcReg))->Encoding)
456 .addReg(RISCV::X0);
457 return;
458 }
459
460 if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) {
461 unsigned Opc;
462 if (STI.hasStdExtZfh()) {
463 Opc = RISCV::FSGNJ_H;
464 } else {
465 assert(STI.hasStdExtF() &&
466 (STI.hasStdExtZfhmin() || STI.hasStdExtZfbfmin()) &&
467 "Unexpected extensions");
468 // Zfhmin/Zfbfmin doesn't have FSGNJ_H, replace FSGNJ_H with FSGNJ_S.
469 DstReg = TRI->getMatchingSuperReg(DstReg, RISCV::sub_16,
470 &RISCV::FPR32RegClass);
471 SrcReg = TRI->getMatchingSuperReg(SrcReg, RISCV::sub_16,
472 &RISCV::FPR32RegClass);
473 Opc = RISCV::FSGNJ_S;
474 }
475 BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
476 .addReg(SrcReg, getKillRegState(KillSrc))
477 .addReg(SrcReg, getKillRegState(KillSrc));
478 return;
479 }
480
481 if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) {
482 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_S), DstReg)
483 .addReg(SrcReg, getKillRegState(KillSrc))
484 .addReg(SrcReg, getKillRegState(KillSrc));
485 return;
486 }
487
488 if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) {
489 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_D), DstReg)
490 .addReg(SrcReg, getKillRegState(KillSrc))
491 .addReg(SrcReg, getKillRegState(KillSrc));
492 return;
493 }
494
495 if (RISCV::FPR32RegClass.contains(DstReg) &&
496 RISCV::GPRRegClass.contains(SrcReg)) {
497 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_W_X), DstReg)
498 .addReg(SrcReg, getKillRegState(KillSrc));
499 return;
500 }
501
502 if (RISCV::GPRRegClass.contains(DstReg) &&
503 RISCV::FPR32RegClass.contains(SrcReg)) {
504 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_W), DstReg)
505 .addReg(SrcReg, getKillRegState(KillSrc));
506 return;
507 }
508
509 if (RISCV::FPR64RegClass.contains(DstReg) &&
510 RISCV::GPRRegClass.contains(SrcReg)) {
511 assert(STI.getXLen() == 64 && "Unexpected GPR size");
512 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_D_X), DstReg)
513 .addReg(SrcReg, getKillRegState(KillSrc));
514 return;
515 }
516
517 if (RISCV::GPRRegClass.contains(DstReg) &&
518 RISCV::FPR64RegClass.contains(SrcReg)) {
519 assert(STI.getXLen() == 64 && "Unexpected GPR size");
520 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_D), DstReg)
521 .addReg(SrcReg, getKillRegState(KillSrc));
522 return;
523 }
524
525 // VR->VR copies.
526 static const TargetRegisterClass *RVVRegClasses[] = {
527 &RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
528 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN2M2RegClass,
529 &RISCV::VRN2M4RegClass, &RISCV::VRN3M1RegClass, &RISCV::VRN3M2RegClass,
530 &RISCV::VRN4M1RegClass, &RISCV::VRN4M2RegClass, &RISCV::VRN5M1RegClass,
531 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass, &RISCV::VRN8M1RegClass};
532 for (const auto &RegClass : RVVRegClasses) {
533 if (RegClass->contains(DstReg, SrcReg)) {
534 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RegClass);
535 return;
536 }
537 }
538
539 llvm_unreachable("Impossible reg-to-reg copy");
540}
541
544 Register SrcReg, bool IsKill, int FI,
545 const TargetRegisterClass *RC,
546 const TargetRegisterInfo *TRI,
547 Register VReg) const {
549 MachineFrameInfo &MFI = MF->getFrameInfo();
550
551 unsigned Opcode;
552 bool IsScalableVector = true;
553 if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
554 Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
555 RISCV::SW : RISCV::SD;
556 IsScalableVector = false;
557 } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
558 Opcode = RISCV::PseudoRV32ZdinxSD;
559 IsScalableVector = false;
560 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
561 Opcode = RISCV::FSH;
562 IsScalableVector = false;
563 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
564 Opcode = RISCV::FSW;
565 IsScalableVector = false;
566 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
567 Opcode = RISCV::FSD;
568 IsScalableVector = false;
569 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
570 Opcode = RISCV::VS1R_V;
571 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
572 Opcode = RISCV::VS2R_V;
573 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
574 Opcode = RISCV::VS4R_V;
575 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
576 Opcode = RISCV::VS8R_V;
577 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
578 Opcode = RISCV::PseudoVSPILL2_M1;
579 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
580 Opcode = RISCV::PseudoVSPILL2_M2;
581 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
582 Opcode = RISCV::PseudoVSPILL2_M4;
583 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
584 Opcode = RISCV::PseudoVSPILL3_M1;
585 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
586 Opcode = RISCV::PseudoVSPILL3_M2;
587 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
588 Opcode = RISCV::PseudoVSPILL4_M1;
589 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
590 Opcode = RISCV::PseudoVSPILL4_M2;
591 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
592 Opcode = RISCV::PseudoVSPILL5_M1;
593 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
594 Opcode = RISCV::PseudoVSPILL6_M1;
595 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
596 Opcode = RISCV::PseudoVSPILL7_M1;
597 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
598 Opcode = RISCV::PseudoVSPILL8_M1;
599 else
600 llvm_unreachable("Can't store this register to stack slot");
601
602 if (IsScalableVector) {
606
608 BuildMI(MBB, I, DebugLoc(), get(Opcode))
609 .addReg(SrcReg, getKillRegState(IsKill))
610 .addFrameIndex(FI)
611 .addMemOperand(MMO);
612 } else {
615 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
616
617 BuildMI(MBB, I, DebugLoc(), get(Opcode))
618 .addReg(SrcReg, getKillRegState(IsKill))
619 .addFrameIndex(FI)
620 .addImm(0)
621 .addMemOperand(MMO);
622 }
623}
624
627 Register DstReg, int FI,
628 const TargetRegisterClass *RC,
629 const TargetRegisterInfo *TRI,
630 Register VReg) const {
632 MachineFrameInfo &MFI = MF->getFrameInfo();
633
634 unsigned Opcode;
635 bool IsScalableVector = true;
636 if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
637 Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
638 RISCV::LW : RISCV::LD;
639 IsScalableVector = false;
640 } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
641 Opcode = RISCV::PseudoRV32ZdinxLD;
642 IsScalableVector = false;
643 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
644 Opcode = RISCV::FLH;
645 IsScalableVector = false;
646 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
647 Opcode = RISCV::FLW;
648 IsScalableVector = false;
649 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
650 Opcode = RISCV::FLD;
651 IsScalableVector = false;
652 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
653 Opcode = RISCV::VL1RE8_V;
654 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
655 Opcode = RISCV::VL2RE8_V;
656 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
657 Opcode = RISCV::VL4RE8_V;
658 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
659 Opcode = RISCV::VL8RE8_V;
660 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
661 Opcode = RISCV::PseudoVRELOAD2_M1;
662 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
663 Opcode = RISCV::PseudoVRELOAD2_M2;
664 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
665 Opcode = RISCV::PseudoVRELOAD2_M4;
666 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
667 Opcode = RISCV::PseudoVRELOAD3_M1;
668 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
669 Opcode = RISCV::PseudoVRELOAD3_M2;
670 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
671 Opcode = RISCV::PseudoVRELOAD4_M1;
672 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
673 Opcode = RISCV::PseudoVRELOAD4_M2;
674 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
675 Opcode = RISCV::PseudoVRELOAD5_M1;
676 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
677 Opcode = RISCV::PseudoVRELOAD6_M1;
678 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
679 Opcode = RISCV::PseudoVRELOAD7_M1;
680 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
681 Opcode = RISCV::PseudoVRELOAD8_M1;
682 else
683 llvm_unreachable("Can't load this register from stack slot");
684
685 if (IsScalableVector) {
689
691 BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg)
692 .addFrameIndex(FI)
693 .addMemOperand(MMO);
694 } else {
697 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
698
699 BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg)
700 .addFrameIndex(FI)
701 .addImm(0)
702 .addMemOperand(MMO);
703 }
704}
705
708 MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS,
709 VirtRegMap *VRM) const {
710 const MachineFrameInfo &MFI = MF.getFrameInfo();
711
712 // The below optimizations narrow the load so they are only valid for little
713 // endian.
714 // TODO: Support big endian by adding an offset into the frame object?
715 if (MF.getDataLayout().isBigEndian())
716 return nullptr;
717
718 // Fold load from stack followed by sext.b/sext.h/sext.w/zext.b/zext.h/zext.w.
719 if (Ops.size() != 1 || Ops[0] != 1)
720 return nullptr;
721
722 unsigned LoadOpc;
723 switch (MI.getOpcode()) {
724 default:
725 if (RISCV::isSEXT_W(MI)) {
726 LoadOpc = RISCV::LW;
727 break;
728 }
729 if (RISCV::isZEXT_W(MI)) {
730 LoadOpc = RISCV::LWU;
731 break;
732 }
733 if (RISCV::isZEXT_B(MI)) {
734 LoadOpc = RISCV::LBU;
735 break;
736 }
737 return nullptr;
738 case RISCV::SEXT_H:
739 LoadOpc = RISCV::LH;
740 break;
741 case RISCV::SEXT_B:
742 LoadOpc = RISCV::LB;
743 break;
744 case RISCV::ZEXT_H_RV32:
745 case RISCV::ZEXT_H_RV64:
746 LoadOpc = RISCV::LHU;
747 break;
748 }
749
751 MachinePointerInfo::getFixedStack(MF, FrameIndex),
753 MFI.getObjectAlign(FrameIndex));
754
755 Register DstReg = MI.getOperand(0).getReg();
756 return BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(LoadOpc),
757 DstReg)
758 .addFrameIndex(FrameIndex)
759 .addImm(0)
760 .addMemOperand(MMO);
761}
762
765 const DebugLoc &DL, Register DstReg, uint64_t Val,
766 MachineInstr::MIFlag Flag, bool DstRenamable,
767 bool DstIsDead) const {
768 Register SrcReg = RISCV::X0;
769
770 // For RV32, allow a sign or unsigned 32 bit value.
771 if (!STI.is64Bit() && !isInt<32>(Val)) {
772 // If have a uimm32 it will still fit in a register so we can allow it.
773 if (!isUInt<32>(Val))
774 report_fatal_error("Should only materialize 32-bit constants for RV32");
775
776 // Sign extend for generateInstSeq.
777 Val = SignExtend64<32>(Val);
778 }
779
781 assert(!Seq.empty());
782
783 bool SrcRenamable = false;
784 unsigned Num = 0;
785
786 for (const RISCVMatInt::Inst &Inst : Seq) {
787 bool LastItem = ++Num == Seq.size();
788 unsigned DstRegState = getDeadRegState(DstIsDead && LastItem) |
789 getRenamableRegState(DstRenamable);
790 unsigned SrcRegState = getKillRegState(SrcReg != RISCV::X0) |
791 getRenamableRegState(SrcRenamable);
792 switch (Inst.getOpndKind()) {
793 case RISCVMatInt::Imm:
794 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
795 .addReg(DstReg, RegState::Define | DstRegState)
796 .addImm(Inst.getImm())
797 .setMIFlag(Flag);
798 break;
800 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
801 .addReg(DstReg, RegState::Define | DstRegState)
802 .addReg(SrcReg, SrcRegState)
803 .addReg(RISCV::X0)
804 .setMIFlag(Flag);
805 break;
807 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
808 .addReg(DstReg, RegState::Define | DstRegState)
809 .addReg(SrcReg, SrcRegState)
810 .addReg(SrcReg, SrcRegState)
811 .setMIFlag(Flag);
812 break;
814 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
815 .addReg(DstReg, RegState::Define | DstRegState)
816 .addReg(SrcReg, SrcRegState)
817 .addImm(Inst.getImm())
818 .setMIFlag(Flag);
819 break;
820 }
821
822 // Only the first instruction has X0 as its source.
823 SrcReg = DstReg;
824 SrcRenamable = DstRenamable;
825 }
826}
827
829 switch (Opc) {
830 default:
832 case RISCV::BEQ:
833 return RISCVCC::COND_EQ;
834 case RISCV::BNE:
835 return RISCVCC::COND_NE;
836 case RISCV::BLT:
837 return RISCVCC::COND_LT;
838 case RISCV::BGE:
839 return RISCVCC::COND_GE;
840 case RISCV::BLTU:
841 return RISCVCC::COND_LTU;
842 case RISCV::BGEU:
843 return RISCVCC::COND_GEU;
844 }
845}
846
847// The contents of values added to Cond are not examined outside of
848// RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we
849// push BranchOpcode, Reg1, Reg2.
852 // Block ends with fall-through condbranch.
853 assert(LastInst.getDesc().isConditionalBranch() &&
854 "Unknown conditional branch");
855 Target = LastInst.getOperand(2).getMBB();
856 unsigned CC = getCondFromBranchOpc(LastInst.getOpcode());
858 Cond.push_back(LastInst.getOperand(0));
859 Cond.push_back(LastInst.getOperand(1));
860}
861
863 switch (CC) {
864 default:
865 llvm_unreachable("Unknown condition code!");
866 case RISCVCC::COND_EQ:
867 return RISCV::BEQ;
868 case RISCVCC::COND_NE:
869 return RISCV::BNE;
870 case RISCVCC::COND_LT:
871 return RISCV::BLT;
872 case RISCVCC::COND_GE:
873 return RISCV::BGE;
875 return RISCV::BLTU;
877 return RISCV::BGEU;
878 }
879}
880
882 return get(RISCVCC::getBrCond(CC));
883}
884
886 switch (CC) {
887 default:
888 llvm_unreachable("Unrecognized conditional branch");
889 case RISCVCC::COND_EQ:
890 return RISCVCC::COND_NE;
891 case RISCVCC::COND_NE:
892 return RISCVCC::COND_EQ;
893 case RISCVCC::COND_LT:
894 return RISCVCC::COND_GE;
895 case RISCVCC::COND_GE:
896 return RISCVCC::COND_LT;
898 return RISCVCC::COND_GEU;
900 return RISCVCC::COND_LTU;
901 }
902}
903
906 MachineBasicBlock *&FBB,
908 bool AllowModify) const {
909 TBB = FBB = nullptr;
910 Cond.clear();
911
912 // If the block has no terminators, it just falls into the block after it.
914 if (I == MBB.end() || !isUnpredicatedTerminator(*I))
915 return false;
916
917 // Count the number of terminators and find the first unconditional or
918 // indirect branch.
919 MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end();
920 int NumTerminators = 0;
921 for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(*J);
922 J++) {
923 NumTerminators++;
924 if (J->getDesc().isUnconditionalBranch() ||
925 J->getDesc().isIndirectBranch()) {
926 FirstUncondOrIndirectBr = J.getReverse();
927 }
928 }
929
930 // If AllowModify is true, we can erase any terminators after
931 // FirstUncondOrIndirectBR.
932 if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) {
933 while (std::next(FirstUncondOrIndirectBr) != MBB.end()) {
934 std::next(FirstUncondOrIndirectBr)->eraseFromParent();
935 NumTerminators--;
936 }
937 I = FirstUncondOrIndirectBr;
938 }
939
940 // We can't handle blocks that end in an indirect branch.
941 if (I->getDesc().isIndirectBranch())
942 return true;
943
944 // We can't handle Generic branch opcodes from Global ISel.
945 if (I->isPreISelOpcode())
946 return true;
947
948 // We can't handle blocks with more than 2 terminators.
949 if (NumTerminators > 2)
950 return true;
951
952 // Handle a single unconditional branch.
953 if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) {
955 return false;
956 }
957
958 // Handle a single conditional branch.
959 if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) {
961 return false;
962 }
963
964 // Handle a conditional branch followed by an unconditional branch.
965 if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() &&
966 I->getDesc().isUnconditionalBranch()) {
967 parseCondBranch(*std::prev(I), TBB, Cond);
968 FBB = getBranchDestBlock(*I);
969 return false;
970 }
971
972 // Otherwise, we can't handle this.
973 return true;
974}
975
977 int *BytesRemoved) const {
978 if (BytesRemoved)
979 *BytesRemoved = 0;
981 if (I == MBB.end())
982 return 0;
983
984 if (!I->getDesc().isUnconditionalBranch() &&
985 !I->getDesc().isConditionalBranch())
986 return 0;
987
988 // Remove the branch.
989 if (BytesRemoved)
990 *BytesRemoved += getInstSizeInBytes(*I);
991 I->eraseFromParent();
992
993 I = MBB.end();
994
995 if (I == MBB.begin())
996 return 1;
997 --I;
998 if (!I->getDesc().isConditionalBranch())
999 return 1;
1000
1001 // Remove the branch.
1002 if (BytesRemoved)
1003 *BytesRemoved += getInstSizeInBytes(*I);
1004 I->eraseFromParent();
1005 return 2;
1006}
1007
1008// Inserts a branch into the end of the specific MachineBasicBlock, returning
1009// the number of instructions inserted.
1012 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
1013 if (BytesAdded)
1014 *BytesAdded = 0;
1015
1016 // Shouldn't be a fall through.
1017 assert(TBB && "insertBranch must not be told to insert a fallthrough");
1018 assert((Cond.size() == 3 || Cond.size() == 0) &&
1019 "RISC-V branch conditions have two components!");
1020
1021 // Unconditional branch.
1022 if (Cond.empty()) {
1023 MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(TBB);
1024 if (BytesAdded)
1025 *BytesAdded += getInstSizeInBytes(MI);
1026 return 1;
1027 }
1028
1029 // Either a one or two-way conditional branch.
1030 auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
1031 MachineInstr &CondMI =
1032 *BuildMI(&MBB, DL, getBrCond(CC)).add(Cond[1]).add(Cond[2]).addMBB(TBB);
1033 if (BytesAdded)
1034 *BytesAdded += getInstSizeInBytes(CondMI);
1035
1036 // One-way conditional branch.
1037 if (!FBB)
1038 return 1;
1039
1040 // Two-way conditional branch.
1041 MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(FBB);
1042 if (BytesAdded)
1043 *BytesAdded += getInstSizeInBytes(MI);
1044 return 2;
1045}
1046
1048 MachineBasicBlock &DestBB,
1049 MachineBasicBlock &RestoreBB,
1050 const DebugLoc &DL, int64_t BrOffset,
1051 RegScavenger *RS) const {
1052 assert(RS && "RegScavenger required for long branching");
1053 assert(MBB.empty() &&
1054 "new block should be inserted for expanding unconditional branch");
1055 assert(MBB.pred_size() == 1);
1056 assert(RestoreBB.empty() &&
1057 "restore block should be inserted for restoring clobbered registers");
1058
1063
1064 if (!isInt<32>(BrOffset))
1066 "Branch offsets outside of the signed 32-bit range not supported");
1067
1068 // FIXME: A virtual register must be used initially, as the register
1069 // scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch
1070 // uses the same workaround).
1071 Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRJALRRegClass);
1072 auto II = MBB.end();
1073 // We may also update the jump target to RestoreBB later.
1074 MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump))
1075 .addReg(ScratchReg, RegState::Define | RegState::Dead)
1076 .addMBB(&DestBB, RISCVII::MO_CALL);
1077
1079 Register TmpGPR =
1080 RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(),
1081 /*RestoreAfter=*/false, /*SpAdj=*/0,
1082 /*AllowSpill=*/false);
1083 if (TmpGPR != RISCV::NoRegister)
1084 RS->setRegUsed(TmpGPR);
1085 else {
1086 // The case when there is no scavenged register needs special handling.
1087
1088 // Pick s11 because it doesn't make a difference.
1089 TmpGPR = RISCV::X27;
1090
1091 int FrameIndex = RVFI->getBranchRelaxationScratchFrameIndex();
1092 if (FrameIndex == -1)
1093 report_fatal_error("underestimated function size");
1094
1095 storeRegToStackSlot(MBB, MI, TmpGPR, /*IsKill=*/true, FrameIndex,
1096 &RISCV::GPRRegClass, TRI, Register());
1097 TRI->eliminateFrameIndex(std::prev(MI.getIterator()),
1098 /*SpAdj=*/0, /*FIOperandNum=*/1);
1099
1100 MI.getOperand(1).setMBB(&RestoreBB);
1101
1102 loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex,
1103 &RISCV::GPRRegClass, TRI, Register());
1104 TRI->eliminateFrameIndex(RestoreBB.back(),
1105 /*SpAdj=*/0, /*FIOperandNum=*/1);
1106 }
1107
1108 MRI.replaceRegWith(ScratchReg, TmpGPR);
1109 MRI.clearVirtRegs();
1110}
1111
1114 assert((Cond.size() == 3) && "Invalid branch condition!");
1115 auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
1117 return false;
1118}
1119
1121 MachineBasicBlock *MBB = MI.getParent();
1123
1124 MachineBasicBlock *TBB, *FBB;
1126 if (analyzeBranch(*MBB, TBB, FBB, Cond, /*AllowModify=*/false))
1127 return false;
1128
1129 RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
1131
1133 return false;
1134
1135 // For two constants C0 and C1 from
1136 // ```
1137 // li Y, C0
1138 // li Z, C1
1139 // ```
1140 // 1. if C1 = C0 + 1
1141 // we can turn:
1142 // (a) blt Y, X -> bge X, Z
1143 // (b) bge Y, X -> blt X, Z
1144 //
1145 // 2. if C1 = C0 - 1
1146 // we can turn:
1147 // (a) blt X, Y -> bge Z, X
1148 // (b) bge X, Y -> blt Z, X
1149 //
1150 // To make sure this optimization is really beneficial, we only
1151 // optimize for cases where Y had only one use (i.e. only used by the branch).
1152
1153 // Right now we only care about LI (i.e. ADDI x0, imm)
1154 auto isLoadImm = [](const MachineInstr *MI, int64_t &Imm) -> bool {
1155 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
1156 MI->getOperand(1).getReg() == RISCV::X0) {
1157 Imm = MI->getOperand(2).getImm();
1158 return true;
1159 }
1160 return false;
1161 };
1162 // Either a load from immediate instruction or X0.
1163 auto isFromLoadImm = [&](const MachineOperand &Op, int64_t &Imm) -> bool {
1164 if (!Op.isReg())
1165 return false;
1166 Register Reg = Op.getReg();
1167 return Reg.isVirtual() && isLoadImm(MRI.getVRegDef(Reg), Imm);
1168 };
1169
1170 MachineOperand &LHS = MI.getOperand(0);
1171 MachineOperand &RHS = MI.getOperand(1);
1172 // Try to find the register for constant Z; return
1173 // invalid register otherwise.
1174 auto searchConst = [&](int64_t C1) -> Register {
1176 auto DefC1 = std::find_if(++II, E, [&](const MachineInstr &I) -> bool {
1177 int64_t Imm;
1178 return isLoadImm(&I, Imm) && Imm == C1 &&
1179 I.getOperand(0).getReg().isVirtual();
1180 });
1181 if (DefC1 != E)
1182 return DefC1->getOperand(0).getReg();
1183
1184 return Register();
1185 };
1186
1187 bool Modify = false;
1188 int64_t C0;
1189 if (isFromLoadImm(LHS, C0) && MRI.hasOneUse(LHS.getReg())) {
1190 // Might be case 1.
1191 // Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need
1192 // to worry about unsigned overflow here)
1193 if (C0 < INT64_MAX)
1194 if (Register RegZ = searchConst(C0 + 1)) {
1196 Cond[1] = MachineOperand::CreateReg(RHS.getReg(), /*isDef=*/false);
1197 Cond[2] = MachineOperand::CreateReg(RegZ, /*isDef=*/false);
1198 // We might extend the live range of Z, clear its kill flag to
1199 // account for this.
1200 MRI.clearKillFlags(RegZ);
1201 Modify = true;
1202 }
1203 } else if (isFromLoadImm(RHS, C0) && MRI.hasOneUse(RHS.getReg())) {
1204 // Might be case 2.
1205 // For unsigned cases, we don't want C1 to wrap back to UINT64_MAX
1206 // when C0 is zero.
1207 if ((CC == RISCVCC::COND_GE || CC == RISCVCC::COND_LT) || C0)
1208 if (Register RegZ = searchConst(C0 - 1)) {
1210 Cond[1] = MachineOperand::CreateReg(RegZ, /*isDef=*/false);
1211 Cond[2] = MachineOperand::CreateReg(LHS.getReg(), /*isDef=*/false);
1212 // We might extend the live range of Z, clear its kill flag to
1213 // account for this.
1214 MRI.clearKillFlags(RegZ);
1215 Modify = true;
1216 }
1217 }
1218
1219 if (!Modify)
1220 return false;
1221
1222 // Build the new branch and remove the old one.
1223 BuildMI(*MBB, MI, MI.getDebugLoc(),
1224 getBrCond(static_cast<RISCVCC::CondCode>(Cond[0].getImm())))
1225 .add(Cond[1])
1226 .add(Cond[2])
1227 .addMBB(TBB);
1228 MI.eraseFromParent();
1229
1230 return true;
1231}
1232
1235 assert(MI.getDesc().isBranch() && "Unexpected opcode!");
1236 // The branch target is always the last operand.
1237 int NumOp = MI.getNumExplicitOperands();
1238 return MI.getOperand(NumOp - 1).getMBB();
1239}
1240
1242 int64_t BrOffset) const {
1243 unsigned XLen = STI.getXLen();
1244 // Ideally we could determine the supported branch offset from the
1245 // RISCVII::FormMask, but this can't be used for Pseudo instructions like
1246 // PseudoBR.
1247 switch (BranchOp) {
1248 default:
1249 llvm_unreachable("Unexpected opcode!");
1250 case RISCV::BEQ:
1251 case RISCV::BNE:
1252 case RISCV::BLT:
1253 case RISCV::BGE:
1254 case RISCV::BLTU:
1255 case RISCV::BGEU:
1256 return isIntN(13, BrOffset);
1257 case RISCV::JAL:
1258 case RISCV::PseudoBR:
1259 return isIntN(21, BrOffset);
1260 case RISCV::PseudoJump:
1261 return isIntN(32, SignExtend64(BrOffset + 0x800, XLen));
1262 }
1263}
1264
1265// If the operation has a predicated pseudo instruction, return the pseudo
1266// instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END.
1267// TODO: Support more operations.
1268unsigned getPredicatedOpcode(unsigned Opcode) {
1269 switch (Opcode) {
1270 case RISCV::ADD: return RISCV::PseudoCCADD; break;
1271 case RISCV::SUB: return RISCV::PseudoCCSUB; break;
1272 case RISCV::SLL: return RISCV::PseudoCCSLL; break;
1273 case RISCV::SRL: return RISCV::PseudoCCSRL; break;
1274 case RISCV::SRA: return RISCV::PseudoCCSRA; break;
1275 case RISCV::AND: return RISCV::PseudoCCAND; break;
1276 case RISCV::OR: return RISCV::PseudoCCOR; break;
1277 case RISCV::XOR: return RISCV::PseudoCCXOR; break;
1278
1279 case RISCV::ADDI: return RISCV::PseudoCCADDI; break;
1280 case RISCV::SLLI: return RISCV::PseudoCCSLLI; break;
1281 case RISCV::SRLI: return RISCV::PseudoCCSRLI; break;
1282 case RISCV::SRAI: return RISCV::PseudoCCSRAI; break;
1283 case RISCV::ANDI: return RISCV::PseudoCCANDI; break;
1284 case RISCV::ORI: return RISCV::PseudoCCORI; break;
1285 case RISCV::XORI: return RISCV::PseudoCCXORI; break;
1286
1287 case RISCV::ADDW: return RISCV::PseudoCCADDW; break;
1288 case RISCV::SUBW: return RISCV::PseudoCCSUBW; break;
1289 case RISCV::SLLW: return RISCV::PseudoCCSLLW; break;
1290 case RISCV::SRLW: return RISCV::PseudoCCSRLW; break;
1291 case RISCV::SRAW: return RISCV::PseudoCCSRAW; break;
1292
1293 case RISCV::ADDIW: return RISCV::PseudoCCADDIW; break;
1294 case RISCV::SLLIW: return RISCV::PseudoCCSLLIW; break;
1295 case RISCV::SRLIW: return RISCV::PseudoCCSRLIW; break;
1296 case RISCV::SRAIW: return RISCV::PseudoCCSRAIW; break;
1297
1298 case RISCV::ANDN: return RISCV::PseudoCCANDN; break;
1299 case RISCV::ORN: return RISCV::PseudoCCORN; break;
1300 case RISCV::XNOR: return RISCV::PseudoCCXNOR; break;
1301 }
1302
1303 return RISCV::INSTRUCTION_LIST_END;
1304}
1305
1306/// Identify instructions that can be folded into a CCMOV instruction, and
1307/// return the defining instruction.
1309 const MachineRegisterInfo &MRI,
1310 const TargetInstrInfo *TII) {
1311 if (!Reg.isVirtual())
1312 return nullptr;
1313 if (!MRI.hasOneNonDBGUse(Reg))
1314 return nullptr;
1315 MachineInstr *MI = MRI.getVRegDef(Reg);
1316 if (!MI)
1317 return nullptr;
1318 // Check if MI can be predicated and folded into the CCMOV.
1319 if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)
1320 return nullptr;
1321 // Don't predicate li idiom.
1322 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
1323 MI->getOperand(1).getReg() == RISCV::X0)
1324 return nullptr;
1325 // Check if MI has any other defs or physreg uses.
1326 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
1327 // Reject frame index operands, PEI can't handle the predicated pseudos.
1328 if (MO.isFI() || MO.isCPI() || MO.isJTI())
1329 return nullptr;
1330 if (!MO.isReg())
1331 continue;
1332 // MI can't have any tied operands, that would conflict with predication.
1333 if (MO.isTied())
1334 return nullptr;
1335 if (MO.isDef())
1336 return nullptr;
1337 // Allow constant physregs.
1338 if (MO.getReg().isPhysical() && !MRI.isConstantPhysReg(MO.getReg()))
1339 return nullptr;
1340 }
1341 bool DontMoveAcrossStores = true;
1342 if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
1343 return nullptr;
1344 return MI;
1345}
1346
1349 unsigned &TrueOp, unsigned &FalseOp,
1350 bool &Optimizable) const {
1351 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
1352 "Unknown select instruction");
1353 // CCMOV operands:
1354 // 0: Def.
1355 // 1: LHS of compare.
1356 // 2: RHS of compare.
1357 // 3: Condition code.
1358 // 4: False use.
1359 // 5: True use.
1360 TrueOp = 5;
1361 FalseOp = 4;
1362 Cond.push_back(MI.getOperand(1));
1363 Cond.push_back(MI.getOperand(2));
1364 Cond.push_back(MI.getOperand(3));
1365 // We can only fold when we support short forward branch opt.
1366 Optimizable = STI.hasShortForwardBranchOpt();
1367 return false;
1368}
1369
1373 bool PreferFalse) const {
1374 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
1375 "Unknown select instruction");
1376 if (!STI.hasShortForwardBranchOpt())
1377 return nullptr;
1378
1379 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
1381 canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this);
1382 bool Invert = !DefMI;
1383 if (!DefMI)
1384 DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this);
1385 if (!DefMI)
1386 return nullptr;
1387
1388 // Find new register class to use.
1389 MachineOperand FalseReg = MI.getOperand(Invert ? 5 : 4);
1390 Register DestReg = MI.getOperand(0).getReg();
1391 const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
1392 if (!MRI.constrainRegClass(DestReg, PreviousClass))
1393 return nullptr;
1394
1395 unsigned PredOpc = getPredicatedOpcode(DefMI->getOpcode());
1396 assert(PredOpc != RISCV::INSTRUCTION_LIST_END && "Unexpected opcode!");
1397
1398 // Create a new predicated version of DefMI.
1399 MachineInstrBuilder NewMI =
1400 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(PredOpc), DestReg);
1401
1402 // Copy the condition portion.
1403 NewMI.add(MI.getOperand(1));
1404 NewMI.add(MI.getOperand(2));
1405
1406 // Add condition code, inverting if necessary.
1407 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
1408 if (Invert)
1410 NewMI.addImm(CC);
1411
1412 // Copy the false register.
1413 NewMI.add(FalseReg);
1414
1415 // Copy all the DefMI operands.
1416 const MCInstrDesc &DefDesc = DefMI->getDesc();
1417 for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i)
1418 NewMI.add(DefMI->getOperand(i));
1419
1420 // Update SeenMIs set: register newly created MI and erase removed DefMI.
1421 SeenMIs.insert(NewMI);
1422 SeenMIs.erase(DefMI);
1423
1424 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
1425 // DefMI would be invalid when tranferred inside the loop. Checking for a
1426 // loop is expensive, but at least remove kill flags if they are in different
1427 // BBs.
1428 if (DefMI->getParent() != MI.getParent())
1429 NewMI->clearKillInfo();
1430
1431 // The caller will erase MI, but not DefMI.
1433 return NewMI;
1434}
1435
1437 if (MI.isMetaInstruction())
1438 return 0;
1439
1440 unsigned Opcode = MI.getOpcode();
1441
1442 if (Opcode == TargetOpcode::INLINEASM ||
1443 Opcode == TargetOpcode::INLINEASM_BR) {
1444 const MachineFunction &MF = *MI.getParent()->getParent();
1445 const auto &TM = static_cast<const RISCVTargetMachine &>(MF.getTarget());
1446 return getInlineAsmLength(MI.getOperand(0).getSymbolName(),
1447 *TM.getMCAsmInfo());
1448 }
1449
1450 if (!MI.memoperands_empty()) {
1451 MachineMemOperand *MMO = *(MI.memoperands_begin());
1452 const MachineFunction &MF = *MI.getParent()->getParent();
1453 const auto &ST = MF.getSubtarget<RISCVSubtarget>();
1454 if (ST.hasStdExtZihintntl() && MMO->isNonTemporal()) {
1455 if (ST.hasStdExtCOrZca() && ST.enableRVCHintInstrs()) {
1456 if (isCompressibleInst(MI, STI))
1457 return 4; // c.ntl.all + c.load/c.store
1458 return 6; // c.ntl.all + load/store
1459 }
1460 return 8; // ntl.all + load/store
1461 }
1462 }
1463
1464 if (Opcode == TargetOpcode::BUNDLE)
1465 return getInstBundleLength(MI);
1466
1467 if (MI.getParent() && MI.getParent()->getParent()) {
1468 if (isCompressibleInst(MI, STI))
1469 return 2;
1470 }
1471
1472 switch (Opcode) {
1473 case TargetOpcode::STACKMAP:
1474 // The upper bound for a stackmap intrinsic is the full length of its shadow
1476 case TargetOpcode::PATCHPOINT:
1477 // The size of the patchpoint intrinsic is the number of bytes requested
1479 case TargetOpcode::STATEPOINT: {
1480 // The size of the statepoint intrinsic is the number of bytes requested
1481 unsigned NumBytes = StatepointOpers(&MI).getNumPatchBytes();
1482 // No patch bytes means at most a PseudoCall is emitted
1483 return std::max(NumBytes, 8U);
1484 }
1485 default:
1486 return get(Opcode).getSize();
1487 }
1488}
1489
1490unsigned RISCVInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
1491 unsigned Size = 0;
1493 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
1494 while (++I != E && I->isInsideBundle()) {
1495 assert(!I->isBundle() && "No nested bundle!");
1497 }
1498 return Size;
1499}
1500
1502 const unsigned Opcode = MI.getOpcode();
1503 switch (Opcode) {
1504 default:
1505 break;
1506 case RISCV::FSGNJ_D:
1507 case RISCV::FSGNJ_S:
1508 case RISCV::FSGNJ_H:
1509 case RISCV::FSGNJ_D_INX:
1510 case RISCV::FSGNJ_D_IN32X:
1511 case RISCV::FSGNJ_S_INX:
1512 case RISCV::FSGNJ_H_INX:
1513 // The canonical floating-point move is fsgnj rd, rs, rs.
1514 return MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
1515 MI.getOperand(1).getReg() == MI.getOperand(2).getReg();
1516 case RISCV::ADDI:
1517 case RISCV::ORI:
1518 case RISCV::XORI:
1519 return (MI.getOperand(1).isReg() &&
1520 MI.getOperand(1).getReg() == RISCV::X0) ||
1521 (MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0);
1522 }
1523 return MI.isAsCheapAsAMove();
1524}
1525
1526std::optional<DestSourcePair>
1528 if (MI.isMoveReg())
1529 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1530 switch (MI.getOpcode()) {
1531 default:
1532 break;
1533 case RISCV::ADDI:
1534 // Operand 1 can be a frameindex but callers expect registers
1535 if (MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
1536 MI.getOperand(2).getImm() == 0)
1537 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1538 break;
1539 case RISCV::FSGNJ_D:
1540 case RISCV::FSGNJ_S:
1541 case RISCV::FSGNJ_H:
1542 case RISCV::FSGNJ_D_INX:
1543 case RISCV::FSGNJ_D_IN32X:
1544 case RISCV::FSGNJ_S_INX:
1545 case RISCV::FSGNJ_H_INX:
1546 // The canonical floating-point move is fsgnj rd, rs, rs.
1547 if (MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
1548 MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
1549 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1550 break;
1551 }
1552 return std::nullopt;
1553}
1554
1556 if (ForceMachineCombinerStrategy.getNumOccurrences() == 0) {
1557 // The option is unused. Choose Local strategy only for in-order cores. When
1558 // scheduling model is unspecified, use MinInstrCount strategy as more
1559 // generic one.
1560 const auto &SchedModel = STI.getSchedModel();
1561 return (!SchedModel.hasInstrSchedModel() || SchedModel.isOutOfOrder())
1564 }
1565 // The strategy was forced by the option.
1567}
1568
1570 MachineInstr &Root, unsigned &Pattern,
1571 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
1572 int16_t FrmOpIdx =
1573 RISCV::getNamedOperandIdx(Root.getOpcode(), RISCV::OpName::frm);
1574 if (FrmOpIdx < 0) {
1575 assert(all_of(InsInstrs,
1576 [](MachineInstr *MI) {
1577 return RISCV::getNamedOperandIdx(MI->getOpcode(),
1578 RISCV::OpName::frm) < 0;
1579 }) &&
1580 "New instructions require FRM whereas the old one does not have it");
1581 return;
1582 }
1583
1584 const MachineOperand &FRM = Root.getOperand(FrmOpIdx);
1585 MachineFunction &MF = *Root.getMF();
1586
1587 for (auto *NewMI : InsInstrs) {
1588 assert(static_cast<unsigned>(RISCV::getNamedOperandIdx(
1589 NewMI->getOpcode(), RISCV::OpName::frm)) ==
1590 NewMI->getNumOperands() &&
1591 "Instruction has unexpected number of operands");
1592 MachineInstrBuilder MIB(MF, NewMI);
1593 MIB.add(FRM);
1594 if (FRM.getImm() == RISCVFPRndMode::DYN)
1595 MIB.addUse(RISCV::FRM, RegState::Implicit);
1596 }
1597}
1598
1599static bool isFADD(unsigned Opc) {
1600 switch (Opc) {
1601 default:
1602 return false;
1603 case RISCV::FADD_H:
1604 case RISCV::FADD_S:
1605 case RISCV::FADD_D:
1606 return true;
1607 }
1608}
1609
1610static bool isFSUB(unsigned Opc) {
1611 switch (Opc) {
1612 default:
1613 return false;
1614 case RISCV::FSUB_H:
1615 case RISCV::FSUB_S:
1616 case RISCV::FSUB_D:
1617 return true;
1618 }
1619}
1620
1621static bool isFMUL(unsigned Opc) {
1622 switch (Opc) {
1623 default:
1624 return false;
1625 case RISCV::FMUL_H:
1626 case RISCV::FMUL_S:
1627 case RISCV::FMUL_D:
1628 return true;
1629 }
1630}
1631
1633 bool &Commuted) const {
1634 if (!TargetInstrInfo::hasReassociableSibling(Inst, Commuted))
1635 return false;
1636
1637 const MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo();
1638 unsigned OperandIdx = Commuted ? 2 : 1;
1639 const MachineInstr &Sibling =
1640 *MRI.getVRegDef(Inst.getOperand(OperandIdx).getReg());
1641
1642 int16_t InstFrmOpIdx =
1643 RISCV::getNamedOperandIdx(Inst.getOpcode(), RISCV::OpName::frm);
1644 int16_t SiblingFrmOpIdx =
1645 RISCV::getNamedOperandIdx(Sibling.getOpcode(), RISCV::OpName::frm);
1646
1647 return (InstFrmOpIdx < 0 && SiblingFrmOpIdx < 0) ||
1648 RISCV::hasEqualFRM(Inst, Sibling);
1649}
1650
1652 bool Invert) const {
1653 unsigned Opc = Inst.getOpcode();
1654 if (Invert) {
1655 auto InverseOpcode = getInverseOpcode(Opc);
1656 if (!InverseOpcode)
1657 return false;
1658 Opc = *InverseOpcode;
1659 }
1660
1661 if (isFADD(Opc) || isFMUL(Opc))
1664
1665 switch (Opc) {
1666 default:
1667 return false;
1668 case RISCV::ADD:
1669 case RISCV::ADDW:
1670 case RISCV::AND:
1671 case RISCV::OR:
1672 case RISCV::XOR:
1673 // From RISC-V ISA spec, if both the high and low bits of the same product
1674 // are required, then the recommended code sequence is:
1675 //
1676 // MULH[[S]U] rdh, rs1, rs2
1677 // MUL rdl, rs1, rs2
1678 // (source register specifiers must be in same order and rdh cannot be the
1679 // same as rs1 or rs2)
1680 //
1681 // Microarchitectures can then fuse these into a single multiply operation
1682 // instead of performing two separate multiplies.
1683 // MachineCombiner may reassociate MUL operands and lose the fusion
1684 // opportunity.
1685 case RISCV::MUL:
1686 case RISCV::MULW:
1687 case RISCV::MIN:
1688 case RISCV::MINU:
1689 case RISCV::MAX:
1690 case RISCV::MAXU:
1691 case RISCV::FMIN_H:
1692 case RISCV::FMIN_S:
1693 case RISCV::FMIN_D:
1694 case RISCV::FMAX_H:
1695 case RISCV::FMAX_S:
1696 case RISCV::FMAX_D:
1697 return true;
1698 }
1699
1700 return false;
1701}
1702
1703std::optional<unsigned>
1704RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const {
1705 switch (Opcode) {
1706 default:
1707 return std::nullopt;
1708 case RISCV::FADD_H:
1709 return RISCV::FSUB_H;
1710 case RISCV::FADD_S:
1711 return RISCV::FSUB_S;
1712 case RISCV::FADD_D:
1713 return RISCV::FSUB_D;
1714 case RISCV::FSUB_H:
1715 return RISCV::FADD_H;
1716 case RISCV::FSUB_S:
1717 return RISCV::FADD_S;
1718 case RISCV::FSUB_D:
1719 return RISCV::FADD_D;
1720 case RISCV::ADD:
1721 return RISCV::SUB;
1722 case RISCV::SUB:
1723 return RISCV::ADD;
1724 case RISCV::ADDW:
1725 return RISCV::SUBW;
1726 case RISCV::SUBW:
1727 return RISCV::ADDW;
1728 }
1729}
1730
1732 const MachineOperand &MO,
1733 bool DoRegPressureReduce) {
1734 if (!MO.isReg() || !MO.getReg().isVirtual())
1735 return false;
1736 const MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
1737 MachineInstr *MI = MRI.getVRegDef(MO.getReg());
1738 if (!MI || !isFMUL(MI->getOpcode()))
1739 return false;
1740
1743 return false;
1744
1745 // Try combining even if fmul has more than one use as it eliminates
1746 // dependency between fadd(fsub) and fmul. However, it can extend liveranges
1747 // for fmul operands, so reject the transformation in register pressure
1748 // reduction mode.
1749 if (DoRegPressureReduce && !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1750 return false;
1751
1752 // Do not combine instructions from different basic blocks.
1753 if (Root.getParent() != MI->getParent())
1754 return false;
1755 return RISCV::hasEqualFRM(Root, *MI);
1756}
1757
1759 SmallVectorImpl<unsigned> &Patterns,
1760 bool DoRegPressureReduce) {
1761 unsigned Opc = Root.getOpcode();
1762 bool IsFAdd = isFADD(Opc);
1763 if (!IsFAdd && !isFSUB(Opc))
1764 return false;
1765 bool Added = false;
1766 if (canCombineFPFusedMultiply(Root, Root.getOperand(1),
1767 DoRegPressureReduce)) {
1770 Added = true;
1771 }
1772 if (canCombineFPFusedMultiply(Root, Root.getOperand(2),
1773 DoRegPressureReduce)) {
1776 Added = true;
1777 }
1778 return Added;
1779}
1780
1781static bool getFPPatterns(MachineInstr &Root,
1782 SmallVectorImpl<unsigned> &Patterns,
1783 bool DoRegPressureReduce) {
1784 return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce);
1785}
1786
1787/// Utility routine that checks if \param MO is defined by an
1788/// \param CombineOpc instruction in the basic block \param MBB
1790 const MachineOperand &MO,
1791 unsigned CombineOpc) {
1793 const MachineInstr *MI = nullptr;
1794
1795 if (MO.isReg() && MO.getReg().isVirtual())
1796 MI = MRI.getUniqueVRegDef(MO.getReg());
1797 // And it needs to be in the trace (otherwise, it won't have a depth).
1798 if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)
1799 return nullptr;
1800 // Must only used by the user we combine with.
1801 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1802 return nullptr;
1803
1804 return MI;
1805}
1806
1807/// Utility routine that checks if \param MO is defined by a SLLI in \param
1808/// MBB that can be combined by splitting across 2 SHXADD instructions. The
1809/// first SHXADD shift amount is given by \param OuterShiftAmt.
1811 const MachineOperand &MO,
1812 unsigned OuterShiftAmt) {
1813 const MachineInstr *ShiftMI = canCombine(MBB, MO, RISCV::SLLI);
1814 if (!ShiftMI)
1815 return false;
1816
1817 unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm();
1818 if (InnerShiftAmt < OuterShiftAmt || (InnerShiftAmt - OuterShiftAmt) > 3)
1819 return false;
1820
1821 return true;
1822}
1823
1824// Returns the shift amount from a SHXADD instruction. Returns 0 if the
1825// instruction is not a SHXADD.
1826static unsigned getSHXADDShiftAmount(unsigned Opc) {
1827 switch (Opc) {
1828 default:
1829 return 0;
1830 case RISCV::SH1ADD:
1831 return 1;
1832 case RISCV::SH2ADD:
1833 return 2;
1834 case RISCV::SH3ADD:
1835 return 3;
1836 }
1837}
1838
1839// Look for opportunities to combine (sh3add Z, (add X, (slli Y, 5))) into
1840// (sh3add (sh2add Y, Z), X).
1841static bool getSHXADDPatterns(const MachineInstr &Root,
1842 SmallVectorImpl<unsigned> &Patterns) {
1843 unsigned ShiftAmt = getSHXADDShiftAmount(Root.getOpcode());
1844 if (!ShiftAmt)
1845 return false;
1846
1847 const MachineBasicBlock &MBB = *Root.getParent();
1848
1849 const MachineInstr *AddMI = canCombine(MBB, Root.getOperand(2), RISCV::ADD);
1850 if (!AddMI)
1851 return false;
1852
1853 bool Found = false;
1854 if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(1), ShiftAmt)) {
1856 Found = true;
1857 }
1858 if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(2), ShiftAmt)) {
1860 Found = true;
1861 }
1862
1863 return Found;
1864}
1865
1867 switch (Pattern) {
1873 default:
1875 }
1876}
1877
1879 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
1880 bool DoRegPressureReduce) const {
1881
1882 if (getFPPatterns(Root, Patterns, DoRegPressureReduce))
1883 return true;
1884
1885 if (getSHXADDPatterns(Root, Patterns))
1886 return true;
1887
1888 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
1889 DoRegPressureReduce);
1890}
1891
1892static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc, unsigned Pattern) {
1893 switch (RootOpc) {
1894 default:
1895 llvm_unreachable("Unexpected opcode");
1896 case RISCV::FADD_H:
1897 return RISCV::FMADD_H;
1898 case RISCV::FADD_S:
1899 return RISCV::FMADD_S;
1900 case RISCV::FADD_D:
1901 return RISCV::FMADD_D;
1902 case RISCV::FSUB_H:
1903 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_H
1904 : RISCV::FNMSUB_H;
1905 case RISCV::FSUB_S:
1906 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_S
1907 : RISCV::FNMSUB_S;
1908 case RISCV::FSUB_D:
1909 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_D
1910 : RISCV::FNMSUB_D;
1911 }
1912}
1913
1914static unsigned getAddendOperandIdx(unsigned Pattern) {
1915 switch (Pattern) {
1916 default:
1917 llvm_unreachable("Unexpected pattern");
1920 return 2;
1923 return 1;
1924 }
1925}
1926
1928 unsigned Pattern,
1931 MachineFunction *MF = Root.getMF();
1934
1935 MachineOperand &Mul1 = Prev.getOperand(1);
1936 MachineOperand &Mul2 = Prev.getOperand(2);
1937 MachineOperand &Dst = Root.getOperand(0);
1939
1940 Register DstReg = Dst.getReg();
1941 unsigned FusedOpc = getFPFusedMultiplyOpcode(Root.getOpcode(), Pattern);
1942 uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags();
1943 DebugLoc MergedLoc =
1945
1946 bool Mul1IsKill = Mul1.isKill();
1947 bool Mul2IsKill = Mul2.isKill();
1948 bool AddendIsKill = Addend.isKill();
1949
1950 // We need to clear kill flags since we may be extending the live range past
1951 // a kill. If the mul had kill flags, we can preserve those since we know
1952 // where the previous range stopped.
1953 MRI.clearKillFlags(Mul1.getReg());
1954 MRI.clearKillFlags(Mul2.getReg());
1955
1957 BuildMI(*MF, MergedLoc, TII->get(FusedOpc), DstReg)
1958 .addReg(Mul1.getReg(), getKillRegState(Mul1IsKill))
1959 .addReg(Mul2.getReg(), getKillRegState(Mul2IsKill))
1960 .addReg(Addend.getReg(), getKillRegState(AddendIsKill))
1961 .setMIFlags(IntersectedFlags);
1962
1963 InsInstrs.push_back(MIB);
1964 if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg()))
1965 DelInstrs.push_back(&Prev);
1966 DelInstrs.push_back(&Root);
1967}
1968
1969// Combine patterns like (sh3add Z, (add X, (slli Y, 5))) to
1970// (sh3add (sh2add Y, Z), X) if the shift amount can be split across two
1971// shXadd instructions. The outer shXadd keeps its original opcode.
1972static void
1973genShXAddAddShift(MachineInstr &Root, unsigned AddOpIdx,
1976 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
1977 MachineFunction *MF = Root.getMF();
1980
1981 unsigned OuterShiftAmt = getSHXADDShiftAmount(Root.getOpcode());
1982 assert(OuterShiftAmt != 0 && "Unexpected opcode");
1983
1984 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
1985 MachineInstr *ShiftMI =
1986 MRI.getUniqueVRegDef(AddMI->getOperand(AddOpIdx).getReg());
1987
1988 unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm();
1989 assert(InnerShiftAmt > OuterShiftAmt && "Unexpected shift amount");
1990
1991 unsigned InnerOpc;
1992 switch (InnerShiftAmt - OuterShiftAmt) {
1993 default:
1994 llvm_unreachable("Unexpected shift amount");
1995 case 0:
1996 InnerOpc = RISCV::ADD;
1997 break;
1998 case 1:
1999 InnerOpc = RISCV::SH1ADD;
2000 break;
2001 case 2:
2002 InnerOpc = RISCV::SH2ADD;
2003 break;
2004 case 3:
2005 InnerOpc = RISCV::SH3ADD;
2006 break;
2007 }
2008
2009 const MachineOperand &X = AddMI->getOperand(3 - AddOpIdx);
2010 const MachineOperand &Y = ShiftMI->getOperand(1);
2011 const MachineOperand &Z = Root.getOperand(1);
2012
2013 Register NewVR = MRI.createVirtualRegister(&RISCV::GPRRegClass);
2014
2015 auto MIB1 = BuildMI(*MF, MIMetadata(Root), TII->get(InnerOpc), NewVR)
2016 .addReg(Y.getReg(), getKillRegState(Y.isKill()))
2017 .addReg(Z.getReg(), getKillRegState(Z.isKill()));
2018 auto MIB2 = BuildMI(*MF, MIMetadata(Root), TII->get(Root.getOpcode()),
2019 Root.getOperand(0).getReg())
2020 .addReg(NewVR, RegState::Kill)
2021 .addReg(X.getReg(), getKillRegState(X.isKill()));
2022
2023 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
2024 InsInstrs.push_back(MIB1);
2025 InsInstrs.push_back(MIB2);
2026 DelInstrs.push_back(ShiftMI);
2027 DelInstrs.push_back(AddMI);
2028 DelInstrs.push_back(&Root);
2029}
2030
2032 MachineInstr &Root, unsigned Pattern,
2035 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
2037 switch (Pattern) {
2038 default:
2040 DelInstrs, InstrIdxForVirtReg);
2041 return;
2044 MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(1).getReg());
2045 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
2046 return;
2047 }
2050 MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(2).getReg());
2051 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
2052 return;
2053 }
2055 genShXAddAddShift(Root, 1, InsInstrs, DelInstrs, InstrIdxForVirtReg);
2056 return;
2058 genShXAddAddShift(Root, 2, InsInstrs, DelInstrs, InstrIdxForVirtReg);
2059 return;
2060 }
2061}
2062
2064 StringRef &ErrInfo) const {
2065 MCInstrDesc const &Desc = MI.getDesc();
2066
2067 for (const auto &[Index, Operand] : enumerate(Desc.operands())) {
2068 unsigned OpType = Operand.OperandType;
2069 if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
2071 const MachineOperand &MO = MI.getOperand(Index);
2072 if (MO.isImm()) {
2073 int64_t Imm = MO.getImm();
2074 bool Ok;
2075 switch (OpType) {
2076 default:
2077 llvm_unreachable("Unexpected operand type");
2078
2079 // clang-format off
2080#define CASE_OPERAND_UIMM(NUM) \
2081 case RISCVOp::OPERAND_UIMM##NUM: \
2082 Ok = isUInt<NUM>(Imm); \
2083 break;
2094 // clang-format on
2096 Ok = isShiftedUInt<1, 1>(Imm);
2097 break;
2099 Ok = isShiftedUInt<5, 2>(Imm);
2100 break;
2102 Ok = isShiftedUInt<6, 2>(Imm);
2103 break;
2105 Ok = isShiftedUInt<5, 3>(Imm);
2106 break;
2108 Ok = isUInt<8>(Imm) && Imm >= 32;
2109 break;
2111 Ok = isShiftedUInt<6, 3>(Imm);
2112 break;
2114 Ok = isShiftedInt<6, 4>(Imm) && (Imm != 0);
2115 break;
2117 Ok = isShiftedUInt<8, 2>(Imm) && (Imm != 0);
2118 break;
2120 Ok = Imm == 0;
2121 break;
2123 Ok = isInt<5>(Imm);
2124 break;
2126 Ok = (isInt<5>(Imm) && Imm != -16) || Imm == 16;
2127 break;
2129 Ok = isInt<6>(Imm);
2130 break;
2132 Ok = Imm != 0 && isInt<6>(Imm);
2133 break;
2135 Ok = isUInt<10>(Imm);
2136 break;
2138 Ok = isUInt<11>(Imm);
2139 break;
2141 Ok = isInt<12>(Imm);
2142 break;
2144 Ok = isShiftedInt<7, 5>(Imm);
2145 break;
2147 Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm);
2148 break;
2150 Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm);
2151 Ok = Ok && Imm != 0;
2152 break;
2154 Ok = (isUInt<5>(Imm) && Imm != 0) ||
2155 (Imm >= 0xfffe0 && Imm <= 0xfffff);
2156 break;
2158 Ok = Imm >= 0 && Imm <= 10;
2159 break;
2161 Ok = Imm >= 0 && Imm <= 7;
2162 break;
2164 Ok = Imm >= 1 && Imm <= 10;
2165 break;
2167 Ok = Imm >= 2 && Imm <= 14;
2168 break;
2170 Ok = (Imm & 0xf) == 0;
2171 break;
2172 }
2173 if (!Ok) {
2174 ErrInfo = "Invalid immediate";
2175 return false;
2176 }
2177 }
2178 }
2179 }
2180
2181 const uint64_t TSFlags = Desc.TSFlags;
2182 if (RISCVII::hasVLOp(TSFlags)) {
2183 const MachineOperand &Op = MI.getOperand(RISCVII::getVLOpNum(Desc));
2184 if (!Op.isImm() && !Op.isReg()) {
2185 ErrInfo = "Invalid operand type for VL operand";
2186 return false;
2187 }
2188 if (Op.isReg() && Op.getReg() != RISCV::NoRegister) {
2189 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2190 auto *RC = MRI.getRegClass(Op.getReg());
2191 if (!RISCV::GPRRegClass.hasSubClassEq(RC)) {
2192 ErrInfo = "Invalid register class for VL operand";
2193 return false;
2194 }
2195 }
2196 if (!RISCVII::hasSEWOp(TSFlags)) {
2197 ErrInfo = "VL operand w/o SEW operand?";
2198 return false;
2199 }
2200 }
2201 if (RISCVII::hasSEWOp(TSFlags)) {
2202 unsigned OpIdx = RISCVII::getSEWOpNum(Desc);
2203 if (!MI.getOperand(OpIdx).isImm()) {
2204 ErrInfo = "SEW value expected to be an immediate";
2205 return false;
2206 }
2207 uint64_t Log2SEW = MI.getOperand(OpIdx).getImm();
2208 if (Log2SEW > 31) {
2209 ErrInfo = "Unexpected SEW value";
2210 return false;
2211 }
2212 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
2213 if (!RISCVVType::isValidSEW(SEW)) {
2214 ErrInfo = "Unexpected SEW value";
2215 return false;
2216 }
2217 }
2218 if (RISCVII::hasVecPolicyOp(TSFlags)) {
2219 unsigned OpIdx = RISCVII::getVecPolicyOpNum(Desc);
2220 if (!MI.getOperand(OpIdx).isImm()) {
2221 ErrInfo = "Policy operand expected to be an immediate";
2222 return false;
2223 }
2224 uint64_t Policy = MI.getOperand(OpIdx).getImm();
2226 ErrInfo = "Invalid Policy Value";
2227 return false;
2228 }
2229 if (!RISCVII::hasVLOp(TSFlags)) {
2230 ErrInfo = "policy operand w/o VL operand?";
2231 return false;
2232 }
2233
2234 // VecPolicy operands can only exist on instructions with passthru/merge
2235 // arguments. Note that not all arguments with passthru have vec policy
2236 // operands- some instructions have implicit policies.
2237 unsigned UseOpIdx;
2238 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
2239 ErrInfo = "policy operand w/o tied operand?";
2240 return false;
2241 }
2242 }
2243
2244 return true;
2245}
2246
2248 const MachineInstr &AddrI,
2249 ExtAddrMode &AM) const {
2250 switch (MemI.getOpcode()) {
2251 default:
2252 return false;
2253 case RISCV::LB:
2254 case RISCV::LBU:
2255 case RISCV::LH:
2256 case RISCV::LHU:
2257 case RISCV::LW:
2258 case RISCV::LWU:
2259 case RISCV::LD:
2260 case RISCV::FLH:
2261 case RISCV::FLW:
2262 case RISCV::FLD:
2263 case RISCV::SB:
2264 case RISCV::SH:
2265 case RISCV::SW:
2266 case RISCV::SD:
2267 case RISCV::FSH:
2268 case RISCV::FSW:
2269 case RISCV::FSD:
2270 break;
2271 }
2272
2273 if (MemI.getOperand(0).getReg() == Reg)
2274 return false;
2275
2276 if (AddrI.getOpcode() != RISCV::ADDI || !AddrI.getOperand(1).isReg() ||
2277 !AddrI.getOperand(2).isImm())
2278 return false;
2279
2280 int64_t OldOffset = MemI.getOperand(2).getImm();
2281 int64_t Disp = AddrI.getOperand(2).getImm();
2282 int64_t NewOffset = OldOffset + Disp;
2283 if (!STI.is64Bit())
2284 NewOffset = SignExtend64<32>(NewOffset);
2285
2286 if (!isInt<12>(NewOffset))
2287 return false;
2288
2289 AM.BaseReg = AddrI.getOperand(1).getReg();
2290 AM.ScaledReg = 0;
2291 AM.Scale = 0;
2292 AM.Displacement = NewOffset;
2294 return true;
2295}
2296
2298 const ExtAddrMode &AM) const {
2299
2300 const DebugLoc &DL = MemI.getDebugLoc();
2301 MachineBasicBlock &MBB = *MemI.getParent();
2302
2303 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
2304 "Addressing mode not supported for folding");
2305
2306 return BuildMI(MBB, MemI, DL, get(MemI.getOpcode()))
2307 .addReg(MemI.getOperand(0).getReg(),
2308 MemI.mayLoad() ? RegState::Define : 0)
2309 .addReg(AM.BaseReg)
2310 .addImm(AM.Displacement)
2311 .setMemRefs(MemI.memoperands())
2312 .setMIFlags(MemI.getFlags());
2313}
2314
2317 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2318 const TargetRegisterInfo *TRI) const {
2319 if (!LdSt.mayLoadOrStore())
2320 return false;
2321
2322 // Conservatively, only handle scalar loads/stores for now.
2323 switch (LdSt.getOpcode()) {
2324 case RISCV::LB:
2325 case RISCV::LBU:
2326 case RISCV::SB:
2327 case RISCV::LH:
2328 case RISCV::LHU:
2329 case RISCV::FLH:
2330 case RISCV::SH:
2331 case RISCV::FSH:
2332 case RISCV::LW:
2333 case RISCV::LWU:
2334 case RISCV::FLW:
2335 case RISCV::SW:
2336 case RISCV::FSW:
2337 case RISCV::LD:
2338 case RISCV::FLD:
2339 case RISCV::SD:
2340 case RISCV::FSD:
2341 break;
2342 default:
2343 return false;
2344 }
2345 const MachineOperand *BaseOp;
2346 OffsetIsScalable = false;
2347 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
2348 return false;
2349 BaseOps.push_back(BaseOp);
2350 return true;
2351}
2352
2353// TODO: This was copied from SIInstrInfo. Could it be lifted to a common
2354// helper?
2357 const MachineInstr &MI2,
2359 // Only examine the first "base" operand of each instruction, on the
2360 // assumption that it represents the real base address of the memory access.
2361 // Other operands are typically offsets or indices from this base address.
2362 if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front()))
2363 return true;
2364
2365 if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand())
2366 return false;
2367
2368 auto MO1 = *MI1.memoperands_begin();
2369 auto MO2 = *MI2.memoperands_begin();
2370 if (MO1->getAddrSpace() != MO2->getAddrSpace())
2371 return false;
2372
2373 auto Base1 = MO1->getValue();
2374 auto Base2 = MO2->getValue();
2375 if (!Base1 || !Base2)
2376 return false;
2377 Base1 = getUnderlyingObject(Base1);
2378 Base2 = getUnderlyingObject(Base2);
2379
2380 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
2381 return false;
2382
2383 return Base1 == Base2;
2384}
2385
2387 ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1,
2388 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
2389 int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize,
2390 unsigned NumBytes) const {
2391 // If the mem ops (to be clustered) do not have the same base ptr, then they
2392 // should not be clustered
2393 if (!BaseOps1.empty() && !BaseOps2.empty()) {
2394 const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();
2395 const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();
2396 if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2))
2397 return false;
2398 } else if (!BaseOps1.empty() || !BaseOps2.empty()) {
2399 // If only one base op is empty, they do not have the same base ptr
2400 return false;
2401 }
2402
2403 unsigned CacheLineSize =
2404 BaseOps1.front()->getParent()->getMF()->getSubtarget().getCacheLineSize();
2405 // Assume a cache line size of 64 bytes if no size is set in RISCVSubtarget.
2407 // Cluster if the memory operations are on the same or a neighbouring cache
2408 // line, but limit the maximum ClusterSize to avoid creating too much
2409 // additional register pressure.
2410 return ClusterSize <= 4 && std::abs(Offset1 - Offset2) < CacheLineSize;
2411}
2412
2413// Set BaseReg (the base register operand), Offset (the byte offset being
2414// accessed) and the access Width of the passed instruction that reads/writes
2415// memory. Returns false if the instruction does not read/write memory or the
2416// BaseReg/Offset/Width can't be determined. Is not guaranteed to always
2417// recognise base operands and offsets in all cases.
2418// TODO: Add an IsScalable bool ref argument (like the equivalent AArch64
2419// function) and set it as appropriate.
2421 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
2422 LocationSize &Width, const TargetRegisterInfo *TRI) const {
2423 if (!LdSt.mayLoadOrStore())
2424 return false;
2425
2426 // Here we assume the standard RISC-V ISA, which uses a base+offset
2427 // addressing mode. You'll need to relax these conditions to support custom
2428 // load/store instructions.
2429 if (LdSt.getNumExplicitOperands() != 3)
2430 return false;
2431 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
2432 !LdSt.getOperand(2).isImm())
2433 return false;
2434
2435 if (!LdSt.hasOneMemOperand())
2436 return false;
2437
2438 Width = (*LdSt.memoperands_begin())->getSize();
2439 BaseReg = &LdSt.getOperand(1);
2440 Offset = LdSt.getOperand(2).getImm();
2441 return true;
2442}
2443
2445 const MachineInstr &MIa, const MachineInstr &MIb) const {
2446 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
2447 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
2448
2451 return false;
2452
2453 // Retrieve the base register, offset from the base register and width. Width
2454 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
2455 // base registers are identical, and the offset of a lower memory access +
2456 // the width doesn't overlap the offset of a higher memory access,
2457 // then the memory accesses are different.
2459 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
2460 int64_t OffsetA = 0, OffsetB = 0;
2461 LocationSize WidthA = 0, WidthB = 0;
2462 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
2463 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
2464 if (BaseOpA->isIdenticalTo(*BaseOpB)) {
2465 int LowOffset = std::min(OffsetA, OffsetB);
2466 int HighOffset = std::max(OffsetA, OffsetB);
2467 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
2468 if (LowWidth.hasValue() &&
2469 LowOffset + (int)LowWidth.getValue() <= HighOffset)
2470 return true;
2471 }
2472 }
2473 return false;
2474}
2475
2476std::pair<unsigned, unsigned>
2478 const unsigned Mask = RISCVII::MO_DIRECT_FLAG_MASK;
2479 return std::make_pair(TF & Mask, TF & ~Mask);
2480}
2481
2484 using namespace RISCVII;
2485 static const std::pair<unsigned, const char *> TargetFlags[] = {
2486 {MO_CALL, "riscv-call"},
2487 {MO_LO, "riscv-lo"},
2488 {MO_HI, "riscv-hi"},
2489 {MO_PCREL_LO, "riscv-pcrel-lo"},
2490 {MO_PCREL_HI, "riscv-pcrel-hi"},
2491 {MO_GOT_HI, "riscv-got-hi"},
2492 {MO_TPREL_LO, "riscv-tprel-lo"},
2493 {MO_TPREL_HI, "riscv-tprel-hi"},
2494 {MO_TPREL_ADD, "riscv-tprel-add"},
2495 {MO_TLS_GOT_HI, "riscv-tls-got-hi"},
2496 {MO_TLS_GD_HI, "riscv-tls-gd-hi"},
2497 {MO_TLSDESC_HI, "riscv-tlsdesc-hi"},
2498 {MO_TLSDESC_LOAD_LO, "riscv-tlsdesc-load-lo"},
2499 {MO_TLSDESC_ADD_LO, "riscv-tlsdesc-add-lo"},
2500 {MO_TLSDESC_CALL, "riscv-tlsdesc-call"}};
2501 return ArrayRef(TargetFlags);
2502}
2504 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
2505 const Function &F = MF.getFunction();
2506
2507 // Can F be deduplicated by the linker? If it can, don't outline from it.
2508 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
2509 return false;
2510
2511 // Don't outline from functions with section markings; the program could
2512 // expect that all the code is in the named section.
2513 if (F.hasSection())
2514 return false;
2515
2516 // It's safe to outline from MF.
2517 return true;
2518}
2519
2521 unsigned &Flags) const {
2522 // More accurate safety checking is done in getOutliningCandidateInfo.
2524}
2525
2526// Enum values indicating how an outlined call should be constructed.
2530
2532 MachineFunction &MF) const {
2533 return MF.getFunction().hasMinSize();
2534}
2535
2536std::optional<outliner::OutlinedFunction>
2538 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
2539
2540 // First we need to filter out candidates where the X5 register (IE t0) can't
2541 // be used to setup the function call.
2542 auto CannotInsertCall = [](outliner::Candidate &C) {
2543 const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
2544 return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
2545 };
2546
2547 llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall);
2548
2549 // If the sequence doesn't have enough candidates left, then we're done.
2550 if (RepeatedSequenceLocs.size() < 2)
2551 return std::nullopt;
2552
2553 unsigned SequenceSize = 0;
2554
2555 for (auto &MI : RepeatedSequenceLocs[0])
2556 SequenceSize += getInstSizeInBytes(MI);
2557
2558 // call t0, function = 8 bytes.
2559 unsigned CallOverhead = 8;
2560 for (auto &C : RepeatedSequenceLocs)
2561 C.setCallInfo(MachineOutlinerDefault, CallOverhead);
2562
2563 // jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
2564 unsigned FrameOverhead = 4;
2565 if (RepeatedSequenceLocs[0]
2566 .getMF()
2567 ->getSubtarget<RISCVSubtarget>()
2568 .hasStdExtCOrZca())
2569 FrameOverhead = 2;
2570
2571 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
2572 FrameOverhead, MachineOutlinerDefault);
2573}
2574
2577 unsigned Flags) const {
2578 MachineInstr &MI = *MBBI;
2579 MachineBasicBlock *MBB = MI.getParent();
2580 const TargetRegisterInfo *TRI =
2582 const auto &F = MI.getMF()->getFunction();
2583
2584 // We can manually strip out CFI instructions later.
2585 if (MI.isCFIInstruction())
2586 // If current function has exception handling code, we can't outline &
2587 // strip these CFI instructions since it may break .eh_frame section
2588 // needed in unwinding.
2589 return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal
2591
2592 // We need support for tail calls to outlined functions before return
2593 // statements can be allowed.
2594 if (MI.isReturn())
2596
2597 // Don't allow modifying the X5 register which we use for return addresses for
2598 // these outlined functions.
2599 if (MI.modifiesRegister(RISCV::X5, TRI) ||
2600 MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5))
2602
2603 // Make sure the operands don't reference something unsafe.
2604 for (const auto &MO : MI.operands()) {
2605
2606 // pcrel-hi and pcrel-lo can't put in separate sections, filter that out
2607 // if any possible.
2608 if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO &&
2609 (MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() ||
2610 F.hasSection()))
2612 }
2613
2615}
2616
2619 const outliner::OutlinedFunction &OF) const {
2620
2621 // Strip out any CFI instructions
2622 bool Changed = true;
2623 while (Changed) {
2624 Changed = false;
2625 auto I = MBB.begin();
2626 auto E = MBB.end();
2627 for (; I != E; ++I) {
2628 if (I->isCFIInstruction()) {
2629 I->removeFromParent();
2630 Changed = true;
2631 break;
2632 }
2633 }
2634 }
2635
2636 MBB.addLiveIn(RISCV::X5);
2637
2638 // Add in a return instruction to the end of the outlined frame.
2639 MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR))
2640 .addReg(RISCV::X0, RegState::Define)
2641 .addReg(RISCV::X5)
2642 .addImm(0));
2643}
2644
2648
2649 // Add in a call instruction to the outlined function at the given location.
2650 It = MBB.insert(It,
2651 BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5)
2652 .addGlobalAddress(M.getNamedValue(MF.getName()), 0,
2654 return It;
2655}
2656
2657std::optional<RegImmPair> RISCVInstrInfo::isAddImmediate(const MachineInstr &MI,
2658 Register Reg) const {
2659 // TODO: Handle cases where Reg is a super- or sub-register of the
2660 // destination register.
2661 const MachineOperand &Op0 = MI.getOperand(0);
2662 if (!Op0.isReg() || Reg != Op0.getReg())
2663 return std::nullopt;
2664
2665 // Don't consider ADDIW as a candidate because the caller may not be aware
2666 // of its sign extension behaviour.
2667 if (MI.getOpcode() == RISCV::ADDI && MI.getOperand(1).isReg() &&
2668 MI.getOperand(2).isImm())
2669 return RegImmPair{MI.getOperand(1).getReg(), MI.getOperand(2).getImm()};
2670
2671 return std::nullopt;
2672}
2673
2674// MIR printer helper function to annotate Operands with a comment.
2676 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
2677 const TargetRegisterInfo *TRI) const {
2678 // Print a generic comment for this operand if there is one.
2679 std::string GenericComment =
2681 if (!GenericComment.empty())
2682 return GenericComment;
2683
2684 // If not, we must have an immediate operand.
2685 if (!Op.isImm())
2686 return std::string();
2687
2688 std::string Comment;
2689 raw_string_ostream OS(Comment);
2690
2691 uint64_t TSFlags = MI.getDesc().TSFlags;
2692
2693 // Print the full VType operand of vsetvli/vsetivli instructions, and the SEW
2694 // operand of vector codegen pseudos.
2695 if ((MI.getOpcode() == RISCV::VSETVLI || MI.getOpcode() == RISCV::VSETIVLI ||
2696 MI.getOpcode() == RISCV::PseudoVSETVLI ||
2697 MI.getOpcode() == RISCV::PseudoVSETIVLI ||
2698 MI.getOpcode() == RISCV::PseudoVSETVLIX0) &&
2699 OpIdx == 2) {
2700 unsigned Imm = MI.getOperand(OpIdx).getImm();
2702 } else if (RISCVII::hasSEWOp(TSFlags) &&
2703 OpIdx == RISCVII::getSEWOpNum(MI.getDesc())) {
2704 unsigned Log2SEW = MI.getOperand(OpIdx).getImm();
2705 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
2706 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
2707 OS << "e" << SEW;
2708 } else if (RISCVII::hasVecPolicyOp(TSFlags) &&
2709 OpIdx == RISCVII::getVecPolicyOpNum(MI.getDesc())) {
2710 unsigned Policy = MI.getOperand(OpIdx).getImm();
2712 "Invalid Policy Value");
2713 OS << (Policy & RISCVII::TAIL_AGNOSTIC ? "ta" : "tu") << ", "
2714 << (Policy & RISCVII::MASK_AGNOSTIC ? "ma" : "mu");
2715 }
2716
2717 OS.flush();
2718 return Comment;
2719}
2720
2721// clang-format off
2722#define CASE_VMA_OPCODE_COMMON(OP, TYPE, LMUL) \
2723 RISCV::PseudoV##OP##_##TYPE##_##LMUL
2724
2725#define CASE_VMA_OPCODE_LMULS_M1(OP, TYPE) \
2726 CASE_VMA_OPCODE_COMMON(OP, TYPE, M1): \
2727 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M2): \
2728 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M4): \
2729 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M8)
2730
2731#define CASE_VMA_OPCODE_LMULS_MF2(OP, TYPE) \
2732 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF2): \
2733 case CASE_VMA_OPCODE_LMULS_M1(OP, TYPE)
2734
2735#define CASE_VMA_OPCODE_LMULS_MF4(OP, TYPE) \
2736 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF4): \
2737 case CASE_VMA_OPCODE_LMULS_MF2(OP, TYPE)
2738
2739#define CASE_VMA_OPCODE_LMULS(OP, TYPE) \
2740 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF8): \
2741 case CASE_VMA_OPCODE_LMULS_MF4(OP, TYPE)
2742
2743// VFMA instructions are SEW specific.
2744#define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL, SEW) \
2745 RISCV::PseudoV##OP##_##TYPE##_##LMUL##_##SEW
2746
2747#define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW) \
2748 CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1, SEW): \
2749 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2, SEW): \
2750 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4, SEW): \
2751 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8, SEW)
2752
2753#define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW) \
2754 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2, SEW): \
2755 case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW)
2756
2757#define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE, SEW) \
2758 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4, SEW): \
2759 case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW)
2760
2761#define CASE_VFMA_OPCODE_VV(OP) \
2762 CASE_VFMA_OPCODE_LMULS_MF4(OP, VV, E16): \
2763 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VV, E32): \
2764 case CASE_VFMA_OPCODE_LMULS_M1(OP, VV, E64)
2765
2766#define CASE_VFMA_SPLATS(OP) \
2767 CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16, E16): \
2768 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32, E32): \
2769 case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64, E64)
2770// clang-format on
2771
2773 unsigned &SrcOpIdx1,
2774 unsigned &SrcOpIdx2) const {
2775 const MCInstrDesc &Desc = MI.getDesc();
2776 if (!Desc.isCommutable())
2777 return false;
2778
2779 switch (MI.getOpcode()) {
2780 case RISCV::TH_MVEQZ:
2781 case RISCV::TH_MVNEZ:
2782 // We can't commute operands if operand 2 (i.e., rs1 in
2783 // mveqz/mvnez rd,rs1,rs2) is the zero-register (as it is
2784 // not valid as the in/out-operand 1).
2785 if (MI.getOperand(2).getReg() == RISCV::X0)
2786 return false;
2787 // Operands 1 and 2 are commutable, if we switch the opcode.
2788 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2);
2789 case RISCV::TH_MULA:
2790 case RISCV::TH_MULAW:
2791 case RISCV::TH_MULAH:
2792 case RISCV::TH_MULS:
2793 case RISCV::TH_MULSW:
2794 case RISCV::TH_MULSH:
2795 // Operands 2 and 3 are commutable.
2796 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
2797 case RISCV::PseudoCCMOVGPRNoX0:
2798 case RISCV::PseudoCCMOVGPR:
2799 // Operands 4 and 5 are commutable.
2800 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5);
2801 case CASE_VFMA_SPLATS(FMADD):
2802 case CASE_VFMA_SPLATS(FMSUB):
2803 case CASE_VFMA_SPLATS(FMACC):
2804 case CASE_VFMA_SPLATS(FMSAC):
2807 case CASE_VFMA_SPLATS(FNMACC):
2808 case CASE_VFMA_SPLATS(FNMSAC):
2809 case CASE_VFMA_OPCODE_VV(FMACC):
2810 case CASE_VFMA_OPCODE_VV(FMSAC):
2811 case CASE_VFMA_OPCODE_VV(FNMACC):
2812 case CASE_VFMA_OPCODE_VV(FNMSAC):
2813 case CASE_VMA_OPCODE_LMULS(MADD, VX):
2814 case CASE_VMA_OPCODE_LMULS(NMSUB, VX):
2815 case CASE_VMA_OPCODE_LMULS(MACC, VX):
2816 case CASE_VMA_OPCODE_LMULS(NMSAC, VX):
2817 case CASE_VMA_OPCODE_LMULS(MACC, VV):
2818 case CASE_VMA_OPCODE_LMULS(NMSAC, VV): {
2819 // If the tail policy is undisturbed we can't commute.
2820 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
2821 if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0)
2822 return false;
2823
2824 // For these instructions we can only swap operand 1 and operand 3 by
2825 // changing the opcode.
2826 unsigned CommutableOpIdx1 = 1;
2827 unsigned CommutableOpIdx2 = 3;
2828 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
2829 CommutableOpIdx2))
2830 return false;
2831 return true;
2832 }
2833 case CASE_VFMA_OPCODE_VV(FMADD):
2837 case CASE_VMA_OPCODE_LMULS(MADD, VV):
2838 case CASE_VMA_OPCODE_LMULS(NMSUB, VV): {
2839 // If the tail policy is undisturbed we can't commute.
2840 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
2841 if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0)
2842 return false;
2843
2844 // For these instructions we have more freedom. We can commute with the
2845 // other multiplicand or with the addend/subtrahend/minuend.
2846
2847 // Any fixed operand must be from source 1, 2 or 3.
2848 if (SrcOpIdx1 != CommuteAnyOperandIndex && SrcOpIdx1 > 3)
2849 return false;
2850 if (SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx2 > 3)
2851 return false;
2852
2853 // It both ops are fixed one must be the tied source.
2854 if (SrcOpIdx1 != CommuteAnyOperandIndex &&
2855 SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx1 != 1 && SrcOpIdx2 != 1)
2856 return false;
2857
2858 // Look for two different register operands assumed to be commutable
2859 // regardless of the FMA opcode. The FMA opcode is adjusted later if
2860 // needed.
2861 if (SrcOpIdx1 == CommuteAnyOperandIndex ||
2862 SrcOpIdx2 == CommuteAnyOperandIndex) {
2863 // At least one of operands to be commuted is not specified and
2864 // this method is free to choose appropriate commutable operands.
2865 unsigned CommutableOpIdx1 = SrcOpIdx1;
2866 if (SrcOpIdx1 == SrcOpIdx2) {
2867 // Both of operands are not fixed. Set one of commutable
2868 // operands to the tied source.
2869 CommutableOpIdx1 = 1;
2870 } else if (SrcOpIdx1 == CommuteAnyOperandIndex) {
2871 // Only one of the operands is not fixed.
2872 CommutableOpIdx1 = SrcOpIdx2;
2873 }
2874
2875 // CommutableOpIdx1 is well defined now. Let's choose another commutable
2876 // operand and assign its index to CommutableOpIdx2.
2877 unsigned CommutableOpIdx2;
2878 if (CommutableOpIdx1 != 1) {
2879 // If we haven't already used the tied source, we must use it now.
2880 CommutableOpIdx2 = 1;
2881 } else {
2882 Register Op1Reg = MI.getOperand(CommutableOpIdx1).getReg();
2883
2884 // The commuted operands should have different registers.
2885 // Otherwise, the commute transformation does not change anything and
2886 // is useless. We use this as a hint to make our decision.
2887 if (Op1Reg != MI.getOperand(2).getReg())
2888 CommutableOpIdx2 = 2;
2889 else
2890 CommutableOpIdx2 = 3;
2891 }
2892
2893 // Assign the found pair of commutable indices to SrcOpIdx1 and
2894 // SrcOpIdx2 to return those values.
2895 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
2896 CommutableOpIdx2))
2897 return false;
2898 }
2899
2900 return true;
2901 }
2902 }
2903
2904 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2905}
2906
2907// clang-format off
2908#define CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \
2909 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL: \
2910 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL; \
2911 break;
2912
2913#define CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) \
2914 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \
2915 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \
2916 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \
2917 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8)
2918
2919#define CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) \
2920 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \
2921 CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE)
2922
2923#define CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) \
2924 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \
2925 CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE)
2926
2927#define CASE_VMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \
2928 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \
2929 CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE)
2930
2931#define CASE_VMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
2932 CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16) \
2933 CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32) \
2934 CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64)
2935
2936// VFMA depends on SEW.
2937#define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL, SEW) \
2938 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL##_##SEW: \
2939 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL##_##SEW; \
2940 break;
2941
2942#define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW) \
2943 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1, SEW) \
2944 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2, SEW) \
2945 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4, SEW) \
2946 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8, SEW)
2947
2948#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW) \
2949 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2, SEW) \
2950 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW)
2951
2952#define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \
2953 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E16) \
2954 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \
2955 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64)
2956
2957#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW) \
2958 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4, SEW) \
2959 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW)
2960
2961#define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE, SEW) \
2962 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8, SEW) \
2963 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW)
2964
2965#define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
2966 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16, E16) \
2967 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32, E32) \
2968 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64, E64)
2969
2971 bool NewMI,
2972 unsigned OpIdx1,
2973 unsigned OpIdx2) const {
2974 auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {
2975 if (NewMI)
2976 return *MI.getParent()->getParent()->CloneMachineInstr(&MI);
2977 return MI;
2978 };
2979
2980 switch (MI.getOpcode()) {
2981 case RISCV::TH_MVEQZ:
2982 case RISCV::TH_MVNEZ: {
2983 auto &WorkingMI = cloneIfNew(MI);
2984 WorkingMI.setDesc(get(MI.getOpcode() == RISCV::TH_MVEQZ ? RISCV::TH_MVNEZ
2985 : RISCV::TH_MVEQZ));
2986 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1,
2987 OpIdx2);
2988 }
2989 case RISCV::PseudoCCMOVGPRNoX0:
2990 case RISCV::PseudoCCMOVGPR: {
2991 // CCMOV can be commuted by inverting the condition.
2992 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
2994 auto &WorkingMI = cloneIfNew(MI);
2995 WorkingMI.getOperand(3).setImm(CC);
2996 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false,
2997 OpIdx1, OpIdx2);
2998 }
2999 case CASE_VFMA_SPLATS(FMACC):
3000 case CASE_VFMA_SPLATS(FMADD):
3001 case CASE_VFMA_SPLATS(FMSAC):
3002 case CASE_VFMA_SPLATS(FMSUB):
3003 case CASE_VFMA_SPLATS(FNMACC):
3005 case CASE_VFMA_SPLATS(FNMSAC):
3007 case CASE_VFMA_OPCODE_VV(FMACC):
3008 case CASE_VFMA_OPCODE_VV(FMSAC):
3009 case CASE_VFMA_OPCODE_VV(FNMACC):
3010 case CASE_VFMA_OPCODE_VV(FNMSAC):
3011 case CASE_VMA_OPCODE_LMULS(MADD, VX):
3012 case CASE_VMA_OPCODE_LMULS(NMSUB, VX):
3013 case CASE_VMA_OPCODE_LMULS(MACC, VX):
3014 case CASE_VMA_OPCODE_LMULS(NMSAC, VX):
3015 case CASE_VMA_OPCODE_LMULS(MACC, VV):
3016 case CASE_VMA_OPCODE_LMULS(NMSAC, VV): {
3017 // It only make sense to toggle these between clobbering the
3018 // addend/subtrahend/minuend one of the multiplicands.
3019 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
3020 assert((OpIdx1 == 3 || OpIdx2 == 3) && "Unexpected opcode index");
3021 unsigned Opc;
3022 switch (MI.getOpcode()) {
3023 default:
3024 llvm_unreachable("Unexpected opcode");
3025 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMACC, FMADD)
3026 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMADD, FMACC)
3033 CASE_VFMA_CHANGE_OPCODE_VV(FMACC, FMADD)
3037 CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX)
3038 CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX)
3039 CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX)
3040 CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX)
3041 CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV)
3042 CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV)
3043 }
3044
3045 auto &WorkingMI = cloneIfNew(MI);
3046 WorkingMI.setDesc(get(Opc));
3047 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
3048 OpIdx1, OpIdx2);
3049 }
3050 case CASE_VFMA_OPCODE_VV(FMADD):
3054 case CASE_VMA_OPCODE_LMULS(MADD, VV):
3055 case CASE_VMA_OPCODE_LMULS(NMSUB, VV): {
3056 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
3057 // If one of the operands, is the addend we need to change opcode.
3058 // Otherwise we're just swapping 2 of the multiplicands.
3059 if (OpIdx1 == 3 || OpIdx2 == 3) {
3060 unsigned Opc;
3061 switch (MI.getOpcode()) {
3062 default:
3063 llvm_unreachable("Unexpected opcode");
3064 CASE_VFMA_CHANGE_OPCODE_VV(FMADD, FMACC)
3068 CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV)
3069 CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV)
3070 }
3071
3072 auto &WorkingMI = cloneIfNew(MI);
3073 WorkingMI.setDesc(get(Opc));
3074 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
3075 OpIdx1, OpIdx2);
3076 }
3077 // Let the default code handle it.
3078 break;
3079 }
3080 }
3081
3082 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
3083}
3084
3085#undef CASE_VMA_OPCODE_COMMON
3086#undef CASE_VMA_OPCODE_LMULS_M1
3087#undef CASE_VMA_OPCODE_LMULS_MF2
3088#undef CASE_VMA_OPCODE_LMULS_MF4
3089#undef CASE_VMA_OPCODE_LMULS
3090#undef CASE_VFMA_OPCODE_COMMON
3091#undef CASE_VFMA_OPCODE_LMULS_M1
3092#undef CASE_VFMA_OPCODE_LMULS_MF2
3093#undef CASE_VFMA_OPCODE_LMULS_MF4
3094#undef CASE_VFMA_OPCODE_VV
3095#undef CASE_VFMA_SPLATS
3096
3097// clang-format off
3098#define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \
3099 RISCV::PseudoV##OP##_##LMUL##_TIED
3100
3101#define CASE_WIDEOP_OPCODE_LMULS_MF4(OP) \
3102 CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \
3103 case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \
3104 case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \
3105 case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \
3106 case CASE_WIDEOP_OPCODE_COMMON(OP, M4)
3107
3108#define CASE_WIDEOP_OPCODE_LMULS(OP) \
3109 CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \
3110 case CASE_WIDEOP_OPCODE_LMULS_MF4(OP)
3111
3112#define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \
3113 case RISCV::PseudoV##OP##_##LMUL##_TIED: \
3114 NewOpc = RISCV::PseudoV##OP##_##LMUL; \
3115 break;
3116
3117#define CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \
3118 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \
3119 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \
3120 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \
3121 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \
3122 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4)
3123
3124#define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
3125 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \
3126 CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)
3127
3128// FP Widening Ops may by SEW aware. Create SEW aware cases for these cases.
3129#define CASE_FP_WIDEOP_OPCODE_COMMON(OP, LMUL, SEW) \
3130 RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED
3131
3132#define CASE_FP_WIDEOP_OPCODE_LMULS_MF4(OP) \
3133 CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF4, E16): \
3134 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E16): \
3135 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E32): \
3136 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E16): \
3137 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E32): \
3138 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E16): \
3139 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E32): \
3140 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E16): \
3141 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E32) \
3142
3143#define CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL, SEW) \
3144 case RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED: \
3145 NewOpc = RISCV::PseudoV##OP##_##LMUL##_##SEW; \
3146 break;
3147
3148#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \
3149 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4, E16) \
3150 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E16) \
3151 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E32) \
3152 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E16) \
3153 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E32) \
3154 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E16) \
3155 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E32) \
3156 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16) \
3157 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E32) \
3158
3159#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
3160 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)
3161// clang-format on
3162
3164 LiveVariables *LV,
3165 LiveIntervals *LIS) const {
3167 switch (MI.getOpcode()) {
3168 default:
3169 return nullptr;
3170 case CASE_FP_WIDEOP_OPCODE_LMULS_MF4(FWADD_WV):
3171 case CASE_FP_WIDEOP_OPCODE_LMULS_MF4(FWSUB_WV): {
3172 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
3173 MI.getNumExplicitOperands() == 7 &&
3174 "Expect 7 explicit operands rd, rs2, rs1, rm, vl, sew, policy");
3175 // If the tail policy is undisturbed we can't convert.
3176 if ((MI.getOperand(RISCVII::getVecPolicyOpNum(MI.getDesc())).getImm() &
3177 1) == 0)
3178 return nullptr;
3179 // clang-format off
3180 unsigned NewOpc;
3181 switch (MI.getOpcode()) {
3182 default:
3183 llvm_unreachable("Unexpected opcode");
3186 }
3187 // clang-format on
3188
3189 MachineBasicBlock &MBB = *MI.getParent();
3190 MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
3191 .add(MI.getOperand(0))
3192 .addReg(MI.getOperand(0).getReg(), RegState::Undef)
3193 .add(MI.getOperand(1))
3194 .add(MI.getOperand(2))
3195 .add(MI.getOperand(3))
3196 .add(MI.getOperand(4))
3197 .add(MI.getOperand(5))
3198 .add(MI.getOperand(6));
3199 break;
3200 }
3201 case CASE_WIDEOP_OPCODE_LMULS(WADD_WV):
3202 case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV):
3203 case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV):
3204 case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): {
3205 // If the tail policy is undisturbed we can't convert.
3206 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
3207 MI.getNumExplicitOperands() == 6);
3208 if ((MI.getOperand(5).getImm() & 1) == 0)
3209 return nullptr;
3210
3211 // clang-format off
3212 unsigned NewOpc;
3213 switch (MI.getOpcode()) {
3214 default:
3215 llvm_unreachable("Unexpected opcode");
3220 }
3221 // clang-format on
3222
3223 MachineBasicBlock &MBB = *MI.getParent();
3224 MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
3225 .add(MI.getOperand(0))
3226 .addReg(MI.getOperand(0).getReg(), RegState::Undef)
3227 .add(MI.getOperand(1))
3228 .add(MI.getOperand(2))
3229 .add(MI.getOperand(3))
3230 .add(MI.getOperand(4))
3231 .add(MI.getOperand(5));
3232 break;
3233 }
3234 }
3235 MIB.copyImplicitOps(MI);
3236
3237 if (LV) {
3238 unsigned NumOps = MI.getNumOperands();
3239 for (unsigned I = 1; I < NumOps; ++I) {
3240 MachineOperand &Op = MI.getOperand(I);
3241 if (Op.isReg() && Op.isKill())
3242 LV->replaceKillInstruction(Op.getReg(), MI, *MIB);
3243 }
3244 }
3245
3246 if (LIS) {
3248
3249 if (MI.getOperand(0).isEarlyClobber()) {
3250 // Use operand 1 was tied to early-clobber def operand 0, so its live
3251 // interval could have ended at an early-clobber slot. Now they are not
3252 // tied we need to update it to the normal register slot.
3253 LiveInterval &LI = LIS->getInterval(MI.getOperand(1).getReg());
3255 if (S->end == Idx.getRegSlot(true))
3256 S->end = Idx.getRegSlot();
3257 }
3258 }
3259
3260 return MIB;
3261}
3262
3263#undef CASE_WIDEOP_OPCODE_COMMON
3264#undef CASE_WIDEOP_OPCODE_LMULS_MF4
3265#undef CASE_WIDEOP_OPCODE_LMULS
3266#undef CASE_WIDEOP_CHANGE_OPCODE_COMMON
3267#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4
3268#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS
3269#undef CASE_FP_WIDEOP_OPCODE_COMMON
3270#undef CASE_FP_WIDEOP_OPCODE_LMULS_MF4
3271#undef CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON
3272#undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4
3273#undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS
3274
3277 Register DestReg, uint32_t Amount,
3278 MachineInstr::MIFlag Flag) const {
3280 if (llvm::has_single_bit<uint32_t>(Amount)) {
3281 uint32_t ShiftAmount = Log2_32(Amount);
3282 if (ShiftAmount == 0)
3283 return;
3284 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
3285 .addReg(DestReg, RegState::Kill)
3286 .addImm(ShiftAmount)
3287 .setMIFlag(Flag);
3288 } else if (STI.hasStdExtZba() &&
3289 ((Amount % 3 == 0 && isPowerOf2_64(Amount / 3)) ||
3290 (Amount % 5 == 0 && isPowerOf2_64(Amount / 5)) ||
3291 (Amount % 9 == 0 && isPowerOf2_64(Amount / 9)))) {
3292 // We can use Zba SHXADD+SLLI instructions for multiply in some cases.
3293 unsigned Opc;
3294 uint32_t ShiftAmount;
3295 if (Amount % 9 == 0) {
3296 Opc = RISCV::SH3ADD;
3297 ShiftAmount = Log2_64(Amount / 9);
3298 } else if (Amount % 5 == 0) {
3299 Opc = RISCV::SH2ADD;
3300 ShiftAmount = Log2_64(Amount / 5);
3301 } else if (Amount % 3 == 0) {
3302 Opc = RISCV::SH1ADD;
3303 ShiftAmount = Log2_64(Amount / 3);
3304 } else {
3305 llvm_unreachable("implied by if-clause");
3306 }
3307 if (ShiftAmount)
3308 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
3309 .addReg(DestReg, RegState::Kill)
3310 .addImm(ShiftAmount)
3311 .setMIFlag(Flag);
3312 BuildMI(MBB, II, DL, get(Opc), DestReg)
3313 .addReg(DestReg, RegState::Kill)
3314 .addReg(DestReg)
3315 .setMIFlag(Flag);
3316 } else if (llvm::has_single_bit<uint32_t>(Amount - 1)) {
3317 Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3318 uint32_t ShiftAmount = Log2_32(Amount - 1);
3319 BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
3320 .addReg(DestReg)
3321 .addImm(ShiftAmount)
3322 .setMIFlag(Flag);
3323 BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)
3324 .addReg(ScaledRegister, RegState::Kill)
3325 .addReg(DestReg, RegState::Kill)
3326 .setMIFlag(Flag);
3327 } else if (llvm::has_single_bit<uint32_t>(Amount + 1)) {
3328 Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3329 uint32_t ShiftAmount = Log2_32(Amount + 1);
3330 BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
3331 .addReg(DestReg)
3332 .addImm(ShiftAmount)
3333 .setMIFlag(Flag);
3334 BuildMI(MBB, II, DL, get(RISCV::SUB), DestReg)
3335 .addReg(ScaledRegister, RegState::Kill)
3336 .addReg(DestReg, RegState::Kill)
3337 .setMIFlag(Flag);
3338 } else if (STI.hasStdExtM() || STI.hasStdExtZmmul()) {
3339 Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3340 movImm(MBB, II, DL, N, Amount, Flag);
3341 BuildMI(MBB, II, DL, get(RISCV::MUL), DestReg)
3342 .addReg(DestReg, RegState::Kill)
3344 .setMIFlag(Flag);
3345 } else {
3346 Register Acc;
3347 uint32_t PrevShiftAmount = 0;
3348 for (uint32_t ShiftAmount = 0; Amount >> ShiftAmount; ShiftAmount++) {
3349 if (Amount & (1U << ShiftAmount)) {
3350 if (ShiftAmount)
3351 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
3352 .addReg(DestReg, RegState::Kill)
3353 .addImm(ShiftAmount - PrevShiftAmount)
3354 .setMIFlag(Flag);
3355 if (Amount >> (ShiftAmount + 1)) {
3356 // If we don't have an accmulator yet, create it and copy DestReg.
3357 if (!Acc) {
3358 Acc = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3359 BuildMI(MBB, II, DL, get(TargetOpcode::COPY), Acc)
3360 .addReg(DestReg)
3361 .setMIFlag(Flag);
3362 } else {
3363 BuildMI(MBB, II, DL, get(RISCV::ADD), Acc)
3364 .addReg(Acc, RegState::Kill)
3365 .addReg(DestReg)
3366 .setMIFlag(Flag);
3367 }
3368 }
3369 PrevShiftAmount = ShiftAmount;
3370 }
3371 }
3372 assert(Acc && "Expected valid accumulator");
3373 BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)
3374 .addReg(DestReg, RegState::Kill)
3375 .addReg(Acc, RegState::Kill)
3376 .setMIFlag(Flag);
3377 }
3378}
3379
3382 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
3383 {{MONontemporalBit0, "riscv-nontemporal-domain-bit-0"},
3384 {MONontemporalBit1, "riscv-nontemporal-domain-bit-1"}};
3385 return ArrayRef(TargetFlags);
3386}
3387
3388// Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
3390 return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() &&
3391 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0;
3392}
3393
3394// Returns true if this is the zext.w pattern, adduw rd, rs1, x0.
3396 return MI.getOpcode() == RISCV::ADD_UW && MI.getOperand(1).isReg() &&
3397 MI.getOperand(2).isReg() && MI.getOperand(2).getReg() == RISCV::X0;
3398}
3399
3400// Returns true if this is the zext.b pattern, andi rd, rs1, 255.
3402 return MI.getOpcode() == RISCV::ANDI && MI.getOperand(1).isReg() &&
3403 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 255;
3404}
3405
3406static bool isRVVWholeLoadStore(unsigned Opcode) {
3407 switch (Opcode) {
3408 default:
3409 return false;
3410 case RISCV::VS1R_V:
3411 case RISCV::VS2R_V:
3412 case RISCV::VS4R_V:
3413 case RISCV::VS8R_V:
3414 case RISCV::VL1RE8_V:
3415 case RISCV::VL2RE8_V:
3416 case RISCV::VL4RE8_V:
3417 case RISCV::VL8RE8_V:
3418 case RISCV::VL1RE16_V:
3419 case RISCV::VL2RE16_V:
3420 case RISCV::VL4RE16_V:
3421 case RISCV::VL8RE16_V:
3422 case RISCV::VL1RE32_V:
3423 case RISCV::VL2RE32_V:
3424 case RISCV::VL4RE32_V:
3425 case RISCV::VL8RE32_V:
3426 case RISCV::VL1RE64_V:
3427 case RISCV::VL2RE64_V:
3428 case RISCV::VL4RE64_V:
3429 case RISCV::VL8RE64_V:
3430 return true;
3431 }
3432}
3433
3435 // RVV lacks any support for immediate addressing for stack addresses, so be
3436 // conservative.
3437 unsigned Opcode = MI.getOpcode();
3438 if (!RISCVVPseudosTable::getPseudoInfo(Opcode) &&
3439 !isRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode))
3440 return false;
3441 return true;
3442}
3443
3444std::optional<std::pair<unsigned, unsigned>>
3446 switch (Opcode) {
3447 default:
3448 return std::nullopt;
3449 case RISCV::PseudoVSPILL2_M1:
3450 case RISCV::PseudoVRELOAD2_M1:
3451 return std::make_pair(2u, 1u);
3452 case RISCV::PseudoVSPILL2_M2:
3453 case RISCV::PseudoVRELOAD2_M2:
3454 return std::make_pair(2u, 2u);
3455 case RISCV::PseudoVSPILL2_M4:
3456 case RISCV::PseudoVRELOAD2_M4:
3457 return std::make_pair(2u, 4u);
3458 case RISCV::PseudoVSPILL3_M1:
3459 case RISCV::PseudoVRELOAD3_M1:
3460 return std::make_pair(3u, 1u);
3461 case RISCV::PseudoVSPILL3_M2:
3462 case RISCV::PseudoVRELOAD3_M2:
3463 return std::make_pair(3u, 2u);
3464 case RISCV::PseudoVSPILL4_M1:
3465 case RISCV::PseudoVRELOAD4_M1:
3466 return std::make_pair(4u, 1u);
3467 case RISCV::PseudoVSPILL4_M2:
3468 case RISCV::PseudoVRELOAD4_M2:
3469 return std::make_pair(4u, 2u);
3470 case RISCV::PseudoVSPILL5_M1:
3471 case RISCV::PseudoVRELOAD5_M1:
3472 return std::make_pair(5u, 1u);
3473 case RISCV::PseudoVSPILL6_M1:
3474 case RISCV::PseudoVRELOAD6_M1:
3475 return std::make_pair(6u, 1u);
3476 case RISCV::PseudoVSPILL7_M1:
3477 case RISCV::PseudoVRELOAD7_M1:
3478 return std::make_pair(7u, 1u);
3479 case RISCV::PseudoVSPILL8_M1:
3480 case RISCV::PseudoVRELOAD8_M1:
3481 return std::make_pair(8u, 1u);
3482 }
3483}
3484
3486 return MI.getNumExplicitDefs() == 2 && MI.modifiesRegister(RISCV::VL) &&
3487 !MI.isInlineAsm();
3488}
3489
3490bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) {
3491 int16_t MI1FrmOpIdx =
3492 RISCV::getNamedOperandIdx(MI1.getOpcode(), RISCV::OpName::frm);
3493 int16_t MI2FrmOpIdx =
3494 RISCV::getNamedOperandIdx(MI2.getOpcode(), RISCV::OpName::frm);
3495 if (MI1FrmOpIdx < 0 || MI2FrmOpIdx < 0)
3496 return false;
3497 MachineOperand FrmOp1 = MI1.getOperand(MI1FrmOpIdx);
3498 MachineOperand FrmOp2 = MI2.getOperand(MI2FrmOpIdx);
3499 return FrmOp1.getImm() == FrmOp2.getImm();
3500}
3501
3502std::optional<unsigned>
3504 // TODO: Handle Zvbb instructions
3505 switch (Opcode) {
3506 default:
3507 return std::nullopt;
3508
3509 // 11.6. Vector Single-Width Shift Instructions
3510 case RISCV::VSLL_VX:
3511 case RISCV::VSRL_VX:
3512 case RISCV::VSRA_VX:
3513 // 12.4. Vector Single-Width Scaling Shift Instructions
3514 case RISCV::VSSRL_VX:
3515 case RISCV::VSSRA_VX:
3516 // Only the low lg2(SEW) bits of the shift-amount value are used.
3517 return Log2SEW;
3518
3519 // 11.7 Vector Narrowing Integer Right Shift Instructions
3520 case RISCV::VNSRL_WX:
3521 case RISCV::VNSRA_WX:
3522 // 12.5. Vector Narrowing Fixed-Point Clip Instructions
3523 case RISCV::VNCLIPU_WX:
3524 case RISCV::VNCLIP_WX:
3525 // Only the low lg2(2*SEW) bits of the shift-amount value are used.
3526 return Log2SEW + 1;
3527
3528 // 11.1. Vector Single-Width Integer Add and Subtract
3529 case RISCV::VADD_VX:
3530 case RISCV::VSUB_VX:
3531 case RISCV::VRSUB_VX:
3532 // 11.2. Vector Widening Integer Add/Subtract
3533 case RISCV::VWADDU_VX:
3534 case RISCV::VWSUBU_VX:
3535 case RISCV::VWADD_VX:
3536 case RISCV::VWSUB_VX:
3537 case RISCV::VWADDU_WX:
3538 case RISCV::VWSUBU_WX:
3539 case RISCV::VWADD_WX:
3540 case RISCV::VWSUB_WX:
3541 // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
3542 case RISCV::VADC_VXM:
3543 case RISCV::VADC_VIM:
3544 case RISCV::VMADC_VXM:
3545 case RISCV::VMADC_VIM:
3546 case RISCV::VMADC_VX:
3547 case RISCV::VSBC_VXM:
3548 case RISCV::VMSBC_VXM:
3549 case RISCV::VMSBC_VX:
3550 // 11.5 Vector Bitwise Logical Instructions
3551 case RISCV::VAND_VX:
3552 case RISCV::VOR_VX:
3553 case RISCV::VXOR_VX:
3554 // 11.8. Vector Integer Compare Instructions
3555 case RISCV::VMSEQ_VX:
3556 case RISCV::VMSNE_VX:
3557 case RISCV::VMSLTU_VX:
3558 case RISCV::VMSLT_VX:
3559 case RISCV::VMSLEU_VX:
3560 case RISCV::VMSLE_VX:
3561 case RISCV::VMSGTU_VX:
3562 case RISCV::VMSGT_VX:
3563 // 11.9. Vector Integer Min/Max Instructions
3564 case RISCV::VMINU_VX:
3565 case RISCV::VMIN_VX:
3566 case RISCV::VMAXU_VX:
3567 case RISCV::VMAX_VX:
3568 // 11.10. Vector Single-Width Integer Multiply Instructions
3569 case RISCV::VMUL_VX:
3570 case RISCV::VMULH_VX:
3571 case RISCV::VMULHU_VX:
3572 case RISCV::VMULHSU_VX:
3573 // 11.11. Vector Integer Divide Instructions
3574 case RISCV::VDIVU_VX:
3575 case RISCV::VDIV_VX:
3576 case RISCV::VREMU_VX:
3577 case RISCV::VREM_VX:
3578 // 11.12. Vector Widening Integer Multiply Instructions
3579 case RISCV::VWMUL_VX:
3580 case RISCV::VWMULU_VX:
3581 case RISCV::VWMULSU_VX:
3582 // 11.13. Vector Single-Width Integer Multiply-Add Instructions
3583 case RISCV::VMACC_VX:
3584 case RISCV::VNMSAC_VX:
3585 case RISCV::VMADD_VX:
3586 case RISCV::VNMSUB_VX:
3587 // 11.14. Vector Widening Integer Multiply-Add Instructions
3588 case RISCV::VWMACCU_VX:
3589 case RISCV::VWMACC_VX:
3590 case RISCV::VWMACCSU_VX:
3591 case RISCV::VWMACCUS_VX:
3592 // 11.15. Vector Integer Merge Instructions
3593 case RISCV::VMERGE_VXM:
3594 // 11.16. Vector Integer Move Instructions
3595 case RISCV::VMV_V_X:
3596 // 12.1. Vector Single-Width Saturating Add and Subtract
3597 case RISCV::VSADDU_VX:
3598 case RISCV::VSADD_VX:
3599 case RISCV::VSSUBU_VX:
3600 case RISCV::VSSUB_VX:
3601 // 12.2. Vector Single-Width Averaging Add and Subtract
3602 case RISCV::VAADDU_VX:
3603 case RISCV::VAADD_VX:
3604 case RISCV::VASUBU_VX:
3605 case RISCV::VASUB_VX:
3606 // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
3607 case RISCV::VSMUL_VX:
3608 // 16.1. Integer Scalar Move Instructions
3609 case RISCV::VMV_S_X:
3610 return 1U << Log2SEW;
3611 }
3612}
3613
3614unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode) {
3616 RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode);
3617 if (!RVV)
3618 return 0;
3619 return RVV->BaseInstr;
3620}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
@ MachineOutlinerDefault
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static ARCCC::CondCode getOppositeBranchCondition(ARCCC::CondCode CC)
Return the inverse of passed condition, i.e. turning COND_E to COND_NE.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:693
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
static M68k::CondCode getCondFromBranchOpc(unsigned BrOpc)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const char LLVMTargetMachineRef TM
#define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP)
static bool isRVVWholeLoadStore(unsigned Opcode)
#define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP)
static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc, unsigned Pattern)
static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs)
static unsigned getAddendOperandIdx(unsigned Pattern)
#define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP)
static cl::opt< bool > PreferWholeRegisterMove("riscv-prefer-whole-register-move", cl::init(false), cl::Hidden, cl::desc("Prefer whole register move for vector registers."))
#define CASE_VFMA_SPLATS(OP)
unsigned getPredicatedOpcode(unsigned Opcode)
static void genShXAddAddShift(MachineInstr &Root, unsigned AddOpIdx, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg)
#define CASE_WIDEOP_OPCODE_LMULS(OP)
static bool isFSUB(unsigned Opc)
#define CASE_VMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE)
#define CASE_VFMA_OPCODE_VV(OP)
MachineOutlinerConstructionID
#define CASE_VMA_OPCODE_LMULS(OP, TYPE)
static bool isFMUL(unsigned Opc)
static bool getFPPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce)
#define CASE_OPERAND_UIMM(NUM)
static bool canCombineShiftIntoShXAdd(const MachineBasicBlock &MBB, const MachineOperand &MO, unsigned OuterShiftAmt)
Utility routine that checks if.
static bool isFADD(unsigned Opc)
#define CASE_FP_WIDEOP_OPCODE_LMULS_MF4(OP)
static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI, const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator MBBI, MachineBasicBlock::const_iterator &DefMBBI, RISCVII::VLMUL LMul)
static MachineInstr * canFoldAsPredicatedOp(Register Reg, const MachineRegisterInfo &MRI, const TargetInstrInfo *TII)
Identify instructions that can be folded into a CCMOV instruction, and return the defining instructio...
static bool canCombineFPFusedMultiply(const MachineInstr &Root, const MachineOperand &MO, bool DoRegPressureReduce)
static bool getSHXADDPatterns(const MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool getFPFusedMultiplyPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce)
static cl::opt< MachineTraceStrategy > ForceMachineCombinerStrategy("riscv-force-machine-combiner-strategy", cl::Hidden, cl::desc("Force machine combiner to use a specific strategy for machine " "trace metrics evaluation."), cl::init(MachineTraceStrategy::TS_NumStrategies), cl::values(clEnumValN(MachineTraceStrategy::TS_Local, "local", "Local strategy."), clEnumValN(MachineTraceStrategy::TS_MinInstrCount, "min-instr", "MinInstrCount strategy.")))
static unsigned getSHXADDShiftAmount(unsigned Opc)
#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallVector class.
static cl::opt< unsigned > CacheLineSize("cache-line-size", cl::init(0), cl::Hidden, cl::desc("Use this to override the target cache line size when " "specified by the user."))
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
Value * RHS
Value * LHS
static unsigned getSize(unsigned Kind)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
static DILocation * getMergedLocation(DILocation *LocA, DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
This class represents an Operation in the Expression.
bool isBigEndian() const
Definition: DataLayout.h:239
A debug info location.
Definition: DebugLoc.h:33
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:678
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
LiveInterval & getInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
const Segment * getSegmentContaining(SlotIndex Idx) const
Return the segment that contains the specified index, or null if there is none.
Definition: LiveInterval.h:408
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
bool hasValue() const
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
TypeSize getValue() const
MCInstBuilder & addReg(unsigned Reg)
Add a new register operand.
Definition: MCInstBuilder.h:37
MCInstBuilder & addImm(int64_t Val)
Add a new integer immediate operand.
Definition: MCInstBuilder.h:43
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
bool isConditionalBranch() const
Return true if this is a branch which may fall through to the next instruction or may transfer contro...
Definition: MCInstrDesc.h:317
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
Set of metadata that should be preserved when using BuildMI().
unsigned pred_size() const
reverse_iterator rend()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setStackID(int ObjectIdx, uint8_t ID)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:546
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:329
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:379
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:543
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:792
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:777
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:759
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:475
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:556
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:374
void clearKillInfo()
Clears kill flags on all operands.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MI-level patchpoint operands.
Definition: StackMaps.h:76
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition: StackMaps.h:104
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
std::optional< outliner::OutlinedFunction > getOutliningCandidateInfo(std::vector< outliner::Candidate > &RepeatedSequenceLocs) const override
void movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register DstReg, uint64_t Val, MachineInstr::MIFlag Flag=MachineInstr::NoFlags, bool DstRenamable=false, bool DstIsDead=false) const
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
void mulImm(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator II, const DebugLoc &DL, Register DestReg, uint32_t Amt, MachineInstr::MIFlag Flag) const
Generate code to multiply the value in DestReg by Amt - handles all the common optimizations for this...
const MCInstrDesc & getBrCond(RISCVCC::CondCode CC) const
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &dl, int *BytesAdded=nullptr) const override
bool hasReassociableSibling(const MachineInstr &Inst, bool &Commuted) const override
RISCVInstrInfo(RISCVSubtarget &STI)
void copyPhysRegVector(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RegClass) const
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
MachineInstr * optimizeSelect(MachineInstr &MI, SmallPtrSetImpl< MachineInstr * > &SeenMIs, bool) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool isAsCheapAsAMove(const MachineInstr &MI) const override
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, LocationSize &Width, const TargetRegisterInfo *TRI) const
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc) const override
const RISCVSubtarget & STI
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
std::optional< unsigned > getInverseOpcode(unsigned Opcode) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
MachineTraceStrategy getMachineCombinerTraceStrategy() const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
virtual outliner::InstrType getOutliningTypeImpl(MachineBasicBlock::iterator &MBBI, unsigned Flags) const override
std::optional< RegImmPair > isAddImmediate(const MachineInstr &MI, Register Reg) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MCInst getNop() const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
void finalizeInsInstrs(MachineInstr &Root, unsigned &Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs) const override
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const override
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DstReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
CombinerObjective getCombinerObjective(unsigned Pattern) const override
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
bool optimizeCondBranch(MachineInstr &MI) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
bool analyzeSelect(const MachineInstr &MI, SmallVectorImpl< MachineOperand > &Cond, unsigned &TrueOp, unsigned &FalseOp, bool &Optimizable) const override
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool IsKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
bool hasStdExtCOrZca() const
unsigned getXLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:68
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
MI-level stackmap operands.
Definition: StackMaps.h:35
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition: StackMaps.h:50
MI-level Statepoint operands.
Definition: StackMaps.h:158
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition: StackMaps.h:207
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const
Returns true iff the routine could find two commutable operands in the given machine instruction.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const
Optional target hook that returns true if MBB is safe to outline from, and returns any target-specifi...
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool hasReassociableSibling(const MachineInstr &Inst, bool &Commuted) const
Return true when \P Inst has reassociable sibling.
virtual std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const
const uint8_t TSFlags
Configurable target specific flags.
ArrayRef< MCPhysReg > getRegisters() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode getOppositeBranchCondition(CondCode)
unsigned getBrCond(CondCode CC)
static unsigned getVecPolicyOpNum(const MCInstrDesc &Desc)
static unsigned getVLOpNum(const MCInstrDesc &Desc)
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool isRVVWideningReduction(uint64_t TSFlags)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
static bool hasSEWOp(uint64_t TSFlags)
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
@ OPERAND_UIMMLOG2XLEN_NONZERO
@ OPERAND_UIMM10_LSB00_NONZERO
@ OPERAND_SIMM10_LSB0000_NONZERO
static RISCVII::VLMUL getLMul(uint64_t TSFlags)
static unsigned getNF(uint64_t TSFlags)
static bool isTailAgnostic(unsigned VType)
static RISCVII::VLMUL getVLMUL(unsigned VType)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static bool isValidSEW(unsigned SEW)
void printVType(unsigned VType, raw_ostream &OS)
static unsigned getSEW(unsigned VType)
bool hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2)
std::optional< unsigned > getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW)
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
bool isSEXT_W(const MachineInstr &MI)
bool isFaultFirstLoad(const MachineInstr &MI)
std::optional< std::pair< unsigned, unsigned > > isRVVSpillForZvlsseg(unsigned Opcode)
bool isZEXT_B(const MachineInstr &MI)
bool isRVVSpill(const MachineInstr &MI)
bool isZEXT_W(const MachineInstr &MI)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:718
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
InstrType
Represents how an instruction should be mapped by the outliner.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
@ SHXADD_ADD_SLLI_OP2
@ FMADD_AX
@ FMADD_XA
@ SHXADD_ADD_SLLI_OP1
MachineTraceStrategy
Strategies for selecting traces.
@ TS_MinInstrCount
Select the trace through a block that has the fewest instructions.
@ TS_Local
Select the trace that contains only the current basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
static const MachineMemOperand::Flags MONontemporalBit0
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
unsigned getDeadRegState(bool B)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:269
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:319
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:313
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
unsigned getKillRegState(bool B)
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:233
unsigned getRenamableRegState(bool B)
DWARFExpression::Operation Op
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2051
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:452
#define N
Description of the encoding of one expression Op.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
This represents a simple continuous liveness interval for a value.
Definition: LiveInterval.h:162
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Used to describe a register and immediate addition.
An individual sequence of instructions to be replaced with a call to an outlined function.
The information necessary to create an outlined function for some class of candidate.