LLVM 23.0.0git
RISCVVLOptimizer.cpp
Go to the documentation of this file.
1//===-------------- RISCVVLOptimizer.cpp - VL Optimizer -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This pass reduces the VL where possible at the MI level, before VSETVLI
10// instructions are inserted.
11//
12// The purpose of this optimization is to make the VL argument, for instructions
13// that have a VL argument, as small as possible.
14//
15// This is split into a sparse dataflow analysis where we determine what VL is
16// demanded by each instruction first, and then afterwards try to reduce the VL
17// of each instruction if it demands less than its VL operand.
18//
19// The analysis is explained in more detail in the 2025 EuroLLVM Developers'
20// Meeting talk "Accidental Dataflow Analysis: Extending the RISC-V VL
21// Optimizer", which is available on YouTube at
22// https://www.youtube.com/watch?v=Mfb5fRSdJAc
23//
24// The slides for the talk are available at
25// https://llvm.org/devmtg/2025-04/slides/technical_talk/lau_accidental_dataflow.pdf
26//
27//===---------------------------------------------------------------------===//
28
29#include "RISCV.h"
30#include "RISCVSubtarget.h"
35
36using namespace llvm;
37
38#define DEBUG_TYPE "riscv-vl-optimizer"
39#define PASS_NAME "RISC-V VL Optimizer"
40
41namespace {
42
43/// Wrapper around MachineOperand that defaults to immediate 0.
44struct DemandedVL {
46 DemandedVL() : VL(MachineOperand::CreateImm(0)) {}
47 DemandedVL(MachineOperand VL) : VL(VL) {}
48 static DemandedVL vlmax() {
50 }
51 bool operator!=(const DemandedVL &Other) const {
52 return !VL.isIdenticalTo(Other.VL);
53 }
54
55 DemandedVL max(const DemandedVL &X) const {
56 if (RISCV::isVLKnownLE(VL, X.VL))
57 return X;
58 if (RISCV::isVLKnownLE(X.VL, VL))
59 return *this;
60 return DemandedVL::vlmax();
61 }
62};
63
64class RISCVVLOptimizer : public MachineFunctionPass {
66 const MachineDominatorTree *MDT;
67 const TargetInstrInfo *TII;
68
69public:
70 static char ID;
71
72 RISCVVLOptimizer() : MachineFunctionPass(ID) {}
73
74 bool runOnMachineFunction(MachineFunction &MF) override;
75
76 void getAnalysisUsage(AnalysisUsage &AU) const override {
77 AU.setPreservesCFG();
80 }
81
82 StringRef getPassName() const override { return PASS_NAME; }
83
84private:
85 DemandedVL getMinimumVLForUser(const MachineOperand &UserOp) const;
86 /// Returns true if the users of \p MI have compatible EEWs and SEWs.
87 bool checkUsers(const MachineInstr &MI) const;
88 bool tryReduceVL(MachineInstr &MI, MachineOperand VL) const;
89 bool isSupportedInstr(const MachineInstr &MI) const;
90 bool isCandidate(const MachineInstr &MI) const;
91 void transfer(const MachineInstr &MI);
92
93 /// For a given instruction, records what elements of it are demanded by
94 /// downstream users.
97
98 /// \returns all vector virtual registers that \p MI uses.
99 auto virtual_vec_uses(const MachineInstr &MI) const {
100 return make_filter_range(MI.uses(), [this](const MachineOperand &MO) {
101 return MO.isReg() && MO.getReg().isVirtual() &&
102 RISCVRegisterInfo::isRVVRegClass(MRI->getRegClass(MO.getReg()));
103 });
104 }
105};
106
107/// Represents the EMUL and EEW of a MachineOperand.
108struct OperandInfo {
109 // Represent as 1,2,4,8, ... and fractional indicator. This is because
110 // EMUL can take on values that don't map to RISCVVType::VLMUL values exactly.
111 // For example, a mask operand can have an EMUL less than MF8.
112 // If nullopt, then EMUL isn't used (i.e. only a single scalar is read).
113 std::optional<std::pair<unsigned, bool>> EMUL;
114
115 unsigned Log2EEW;
116
117 OperandInfo(RISCVVType::VLMUL EMUL, unsigned Log2EEW)
118 : EMUL(RISCVVType::decodeVLMUL(EMUL)), Log2EEW(Log2EEW) {}
119
120 OperandInfo(std::pair<unsigned, bool> EMUL, unsigned Log2EEW)
121 : EMUL(EMUL), Log2EEW(Log2EEW) {}
122
123 OperandInfo(unsigned Log2EEW) : Log2EEW(Log2EEW) {}
124
125 OperandInfo() = delete;
126
127 /// Return true if the EMUL and EEW produced by \p Def is compatible with the
128 /// EMUL and EEW used by \p User.
129 static bool areCompatible(const OperandInfo &Def, const OperandInfo &User) {
130 if (Def.Log2EEW != User.Log2EEW)
131 return false;
132 if (User.EMUL && Def.EMUL != User.EMUL)
133 return false;
134 return true;
135 }
136
137 void print(raw_ostream &OS) const {
138 if (EMUL) {
139 OS << "EMUL: m";
140 if (EMUL->second)
141 OS << "f";
142 OS << EMUL->first;
143 } else
144 OS << "EMUL: none\n";
145 OS << ", EEW: " << (1 << Log2EEW);
146 }
147};
148
149} // end anonymous namespace
150
151char RISCVVLOptimizer::ID = 0;
152INITIALIZE_PASS_BEGIN(RISCVVLOptimizer, DEBUG_TYPE, PASS_NAME, false, false)
155
157 return new RISCVVLOptimizer();
158}
159
160[[maybe_unused]]
161static raw_ostream &operator<<(raw_ostream &OS, const OperandInfo &OI) {
162 OI.print(OS);
163 return OS;
164}
165
166[[maybe_unused]]
168 const std::optional<OperandInfo> &OI) {
169 if (OI)
170 OI->print(OS);
171 else
172 OS << "nullopt";
173 return OS;
174}
175
176/// Return EMUL = (EEW / SEW) * LMUL where EEW comes from Log2EEW and LMUL and
177/// SEW are from the TSFlags of MI.
178static std::pair<unsigned, bool>
180 RISCVVType::VLMUL MIVLMUL = RISCVII::getLMul(MI.getDesc().TSFlags);
181 auto [MILMUL, MILMULIsFractional] = RISCVVType::decodeVLMUL(MIVLMUL);
182 unsigned MILog2SEW =
183 MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
184
185 // Mask instructions will have 0 as the SEW operand. But the LMUL of these
186 // instructions is calculated is as if the SEW operand was 3 (e8).
187 if (MILog2SEW == 0)
188 MILog2SEW = 3;
189
190 unsigned MISEW = 1 << MILog2SEW;
191
192 unsigned EEW = 1 << Log2EEW;
193 // Calculate (EEW/SEW)*LMUL preserving fractions less than 1. Use GCD
194 // to put fraction in simplest form.
195 unsigned Num = EEW, Denom = MISEW;
196 int GCD = MILMULIsFractional ? std::gcd(Num, Denom * MILMUL)
197 : std::gcd(Num * MILMUL, Denom);
198 Num = MILMULIsFractional ? Num / GCD : Num * MILMUL / GCD;
199 Denom = MILMULIsFractional ? Denom * MILMUL / GCD : Denom / GCD;
200 return std::make_pair(Num > Denom ? Num : Denom, Denom > Num);
201}
202
203/// Dest has EEW=SEW. Source EEW=SEW/Factor (i.e. F2 => EEW/2).
204/// SEW comes from TSFlags of MI.
205static unsigned getIntegerExtensionOperandEEW(unsigned Factor,
206 const MachineInstr &MI,
207 const MachineOperand &MO) {
208 unsigned MILog2SEW =
209 MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
210
211 if (MO.getOperandNo() == 0)
212 return MILog2SEW;
213
214 unsigned MISEW = 1 << MILog2SEW;
215 unsigned EEW = MISEW / Factor;
216 unsigned Log2EEW = Log2_32(EEW);
217
218 return Log2EEW;
219}
220
221#define VSEG_CASES(Prefix, EEW) \
222 RISCV::Prefix##SEG2E##EEW##_V: \
223 case RISCV::Prefix##SEG3E##EEW##_V: \
224 case RISCV::Prefix##SEG4E##EEW##_V: \
225 case RISCV::Prefix##SEG5E##EEW##_V: \
226 case RISCV::Prefix##SEG6E##EEW##_V: \
227 case RISCV::Prefix##SEG7E##EEW##_V: \
228 case RISCV::Prefix##SEG8E##EEW##_V
229#define VSSEG_CASES(EEW) VSEG_CASES(VS, EEW)
230#define VSSSEG_CASES(EEW) VSEG_CASES(VSS, EEW)
231#define VSUXSEG_CASES(EEW) VSEG_CASES(VSUX, I##EEW)
232#define VSOXSEG_CASES(EEW) VSEG_CASES(VSOX, I##EEW)
233
234static std::optional<unsigned> getOperandLog2EEW(const MachineOperand &MO) {
235 const MachineInstr &MI = *MO.getParent();
236 const MCInstrDesc &Desc = MI.getDesc();
238 RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
239 assert(RVV && "Could not find MI in PseudoTable");
240
241 // MI has a SEW associated with it. The RVV specification defines
242 // the EEW of each operand and definition in relation to MI.SEW.
243 unsigned MILog2SEW = MI.getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
244
245 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc);
246 const bool IsTied = RISCVII::isTiedPseudo(Desc.TSFlags);
247
248 bool IsMODef = MO.getOperandNo() == 0 ||
249 (HasPassthru && MO.getOperandNo() == MI.getNumExplicitDefs());
250
251 // All mask operands have EEW=1
252 const MCOperandInfo &Info = Desc.operands()[MO.getOperandNo()];
253 if (Info.OperandType == MCOI::OPERAND_REGISTER &&
254 Info.RegClass == RISCV::VMV0RegClassID)
255 return 0;
256
257 // switch against BaseInstr to reduce number of cases that need to be
258 // considered.
259 switch (RVV->BaseInstr) {
260
261 // 6. Configuration-Setting Instructions
262 // Configuration setting instructions do not read or write vector registers
263 case RISCV::VSETIVLI:
264 case RISCV::VSETVL:
265 case RISCV::VSETVLI:
266 llvm_unreachable("Configuration setting instructions do not read or write "
267 "vector registers");
268
269 // Vector Loads and Stores
270 // Vector Unit-Stride Instructions
271 // Vector Strided Instructions
272 /// Dest EEW encoded in the instruction
273 case RISCV::VLM_V:
274 case RISCV::VSM_V:
275 return 0;
276 case RISCV::VLE8_V:
277 case RISCV::VSE8_V:
278 case RISCV::VLSE8_V:
279 case RISCV::VSSE8_V:
280 case VSSEG_CASES(8):
281 case VSSSEG_CASES(8):
282 return 3;
283 case RISCV::VLE16_V:
284 case RISCV::VSE16_V:
285 case RISCV::VLSE16_V:
286 case RISCV::VSSE16_V:
287 case VSSEG_CASES(16):
288 case VSSSEG_CASES(16):
289 return 4;
290 case RISCV::VLE32_V:
291 case RISCV::VSE32_V:
292 case RISCV::VLSE32_V:
293 case RISCV::VSSE32_V:
294 case VSSEG_CASES(32):
295 case VSSSEG_CASES(32):
296 return 5;
297 case RISCV::VLE64_V:
298 case RISCV::VSE64_V:
299 case RISCV::VLSE64_V:
300 case RISCV::VSSE64_V:
301 case VSSEG_CASES(64):
302 case VSSSEG_CASES(64):
303 return 6;
304
305 // Vector Indexed Instructions
306 // vs(o|u)xei<eew>.v
307 // Dest/Data (operand 0) EEW=SEW. Source EEW=<eew>.
308 case RISCV::VLUXEI8_V:
309 case RISCV::VLOXEI8_V:
310 case RISCV::VSUXEI8_V:
311 case RISCV::VSOXEI8_V:
312 case VSUXSEG_CASES(8):
313 case VSOXSEG_CASES(8): {
314 if (MO.getOperandNo() == 0)
315 return MILog2SEW;
316 return 3;
317 }
318 case RISCV::VLUXEI16_V:
319 case RISCV::VLOXEI16_V:
320 case RISCV::VSUXEI16_V:
321 case RISCV::VSOXEI16_V:
322 case VSUXSEG_CASES(16):
323 case VSOXSEG_CASES(16): {
324 if (MO.getOperandNo() == 0)
325 return MILog2SEW;
326 return 4;
327 }
328 case RISCV::VLUXEI32_V:
329 case RISCV::VLOXEI32_V:
330 case RISCV::VSUXEI32_V:
331 case RISCV::VSOXEI32_V:
332 case VSUXSEG_CASES(32):
333 case VSOXSEG_CASES(32): {
334 if (MO.getOperandNo() == 0)
335 return MILog2SEW;
336 return 5;
337 }
338 case RISCV::VLUXEI64_V:
339 case RISCV::VLOXEI64_V:
340 case RISCV::VSUXEI64_V:
341 case RISCV::VSOXEI64_V:
342 case VSUXSEG_CASES(64):
343 case VSOXSEG_CASES(64): {
344 if (MO.getOperandNo() == 0)
345 return MILog2SEW;
346 return 6;
347 }
348
349 // Vector Integer Arithmetic Instructions
350 // Vector Single-Width Integer Add and Subtract
351 case RISCV::VADD_VI:
352 case RISCV::VADD_VV:
353 case RISCV::VADD_VX:
354 case RISCV::VSUB_VV:
355 case RISCV::VSUB_VX:
356 case RISCV::VRSUB_VI:
357 case RISCV::VRSUB_VX:
358 // Vector Bitwise Logical Instructions
359 // Vector Single-Width Shift Instructions
360 // EEW=SEW.
361 case RISCV::VAND_VI:
362 case RISCV::VAND_VV:
363 case RISCV::VAND_VX:
364 case RISCV::VOR_VI:
365 case RISCV::VOR_VV:
366 case RISCV::VOR_VX:
367 case RISCV::VXOR_VI:
368 case RISCV::VXOR_VV:
369 case RISCV::VXOR_VX:
370 case RISCV::VSLL_VI:
371 case RISCV::VSLL_VV:
372 case RISCV::VSLL_VX:
373 case RISCV::VSRL_VI:
374 case RISCV::VSRL_VV:
375 case RISCV::VSRL_VX:
376 case RISCV::VSRA_VI:
377 case RISCV::VSRA_VV:
378 case RISCV::VSRA_VX:
379 // Vector Integer Min/Max Instructions
380 // EEW=SEW.
381 case RISCV::VMINU_VV:
382 case RISCV::VMINU_VX:
383 case RISCV::VMIN_VV:
384 case RISCV::VMIN_VX:
385 case RISCV::VMAXU_VV:
386 case RISCV::VMAXU_VX:
387 case RISCV::VMAX_VV:
388 case RISCV::VMAX_VX:
389 // Vector Single-Width Integer Multiply Instructions
390 // Source and Dest EEW=SEW.
391 case RISCV::VMUL_VV:
392 case RISCV::VMUL_VX:
393 case RISCV::VMULH_VV:
394 case RISCV::VMULH_VX:
395 case RISCV::VMULHU_VV:
396 case RISCV::VMULHU_VX:
397 case RISCV::VMULHSU_VV:
398 case RISCV::VMULHSU_VX:
399 // Vector Integer Divide Instructions
400 // EEW=SEW.
401 case RISCV::VDIVU_VV:
402 case RISCV::VDIVU_VX:
403 case RISCV::VDIV_VV:
404 case RISCV::VDIV_VX:
405 case RISCV::VREMU_VV:
406 case RISCV::VREMU_VX:
407 case RISCV::VREM_VV:
408 case RISCV::VREM_VX:
409 // Vector Single-Width Integer Multiply-Add Instructions
410 // EEW=SEW.
411 case RISCV::VMACC_VV:
412 case RISCV::VMACC_VX:
413 case RISCV::VNMSAC_VV:
414 case RISCV::VNMSAC_VX:
415 case RISCV::VMADD_VV:
416 case RISCV::VMADD_VX:
417 case RISCV::VNMSUB_VV:
418 case RISCV::VNMSUB_VX:
419 // Vector Integer Merge Instructions
420 // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
421 // EEW=SEW, except the mask operand has EEW=1. Mask operand is handled
422 // before this switch.
423 case RISCV::VMERGE_VIM:
424 case RISCV::VMERGE_VVM:
425 case RISCV::VMERGE_VXM:
426 case RISCV::VADC_VIM:
427 case RISCV::VADC_VVM:
428 case RISCV::VADC_VXM:
429 case RISCV::VSBC_VVM:
430 case RISCV::VSBC_VXM:
431 // Vector Integer Move Instructions
432 // Vector Fixed-Point Arithmetic Instructions
433 // Vector Single-Width Saturating Add and Subtract
434 // Vector Single-Width Averaging Add and Subtract
435 // EEW=SEW.
436 case RISCV::VMV_V_I:
437 case RISCV::VMV_V_V:
438 case RISCV::VMV_V_X:
439 case RISCV::VSADDU_VI:
440 case RISCV::VSADDU_VV:
441 case RISCV::VSADDU_VX:
442 case RISCV::VSADD_VI:
443 case RISCV::VSADD_VV:
444 case RISCV::VSADD_VX:
445 case RISCV::VSSUBU_VV:
446 case RISCV::VSSUBU_VX:
447 case RISCV::VSSUB_VV:
448 case RISCV::VSSUB_VX:
449 case RISCV::VAADDU_VV:
450 case RISCV::VAADDU_VX:
451 case RISCV::VAADD_VV:
452 case RISCV::VAADD_VX:
453 case RISCV::VASUBU_VV:
454 case RISCV::VASUBU_VX:
455 case RISCV::VASUB_VV:
456 case RISCV::VASUB_VX:
457 // Vector Single-Width Fractional Multiply with Rounding and Saturation
458 // EEW=SEW. The instruction produces 2*SEW product internally but
459 // saturates to fit into SEW bits.
460 case RISCV::VSMUL_VV:
461 case RISCV::VSMUL_VX:
462 // Vector Single-Width Scaling Shift Instructions
463 // EEW=SEW.
464 case RISCV::VSSRL_VI:
465 case RISCV::VSSRL_VV:
466 case RISCV::VSSRL_VX:
467 case RISCV::VSSRA_VI:
468 case RISCV::VSSRA_VV:
469 case RISCV::VSSRA_VX:
470 // Vector Permutation Instructions
471 // Integer Scalar Move Instructions
472 // Floating-Point Scalar Move Instructions
473 // EEW=SEW.
474 case RISCV::VMV_X_S:
475 case RISCV::VMV_S_X:
476 case RISCV::VFMV_F_S:
477 case RISCV::VFMV_S_F:
478 // Vector Slide Instructions
479 // EEW=SEW.
480 case RISCV::VSLIDEUP_VI:
481 case RISCV::VSLIDEUP_VX:
482 case RISCV::VSLIDEDOWN_VI:
483 case RISCV::VSLIDEDOWN_VX:
484 case RISCV::VSLIDE1UP_VX:
485 case RISCV::VFSLIDE1UP_VF:
486 case RISCV::VSLIDE1DOWN_VX:
487 case RISCV::VFSLIDE1DOWN_VF:
488 // Vector Register Gather Instructions
489 // EEW=SEW. For mask operand, EEW=1.
490 case RISCV::VRGATHER_VI:
491 case RISCV::VRGATHER_VV:
492 case RISCV::VRGATHER_VX:
493 // Vector Element Index Instruction
494 case RISCV::VID_V:
495 // Vector Single-Width Floating-Point Add/Subtract Instructions
496 case RISCV::VFADD_VF:
497 case RISCV::VFADD_VV:
498 case RISCV::VFSUB_VF:
499 case RISCV::VFSUB_VV:
500 case RISCV::VFRSUB_VF:
501 // Vector Single-Width Floating-Point Multiply/Divide Instructions
502 case RISCV::VFMUL_VF:
503 case RISCV::VFMUL_VV:
504 case RISCV::VFDIV_VF:
505 case RISCV::VFDIV_VV:
506 case RISCV::VFRDIV_VF:
507 // Vector Single-Width Floating-Point Fused Multiply-Add Instructions
508 case RISCV::VFMACC_VV:
509 case RISCV::VFMACC_VF:
510 case RISCV::VFNMACC_VV:
511 case RISCV::VFNMACC_VF:
512 case RISCV::VFMSAC_VV:
513 case RISCV::VFMSAC_VF:
514 case RISCV::VFNMSAC_VV:
515 case RISCV::VFNMSAC_VF:
516 case RISCV::VFMADD_VV:
517 case RISCV::VFMADD_VF:
518 case RISCV::VFNMADD_VV:
519 case RISCV::VFNMADD_VF:
520 case RISCV::VFMSUB_VV:
521 case RISCV::VFMSUB_VF:
522 case RISCV::VFNMSUB_VV:
523 case RISCV::VFNMSUB_VF:
524 // Vector Floating-Point Square-Root Instruction
525 case RISCV::VFSQRT_V:
526 // Vector Floating-Point Reciprocal Square-Root Estimate Instruction
527 case RISCV::VFRSQRT7_V:
528 // Vector Floating-Point Reciprocal Estimate Instruction
529 case RISCV::VFREC7_V:
530 // Vector Floating-Point MIN/MAX Instructions
531 case RISCV::VFMIN_VF:
532 case RISCV::VFMIN_VV:
533 case RISCV::VFMAX_VF:
534 case RISCV::VFMAX_VV:
535 // Vector Floating-Point Sign-Injection Instructions
536 case RISCV::VFSGNJ_VF:
537 case RISCV::VFSGNJ_VV:
538 case RISCV::VFSGNJN_VV:
539 case RISCV::VFSGNJN_VF:
540 case RISCV::VFSGNJX_VF:
541 case RISCV::VFSGNJX_VV:
542 // Vector Floating-Point Classify Instruction
543 case RISCV::VFCLASS_V:
544 // Vector Floating-Point Move Instruction
545 case RISCV::VFMV_V_F:
546 // Single-Width Floating-Point/Integer Type-Convert Instructions
547 case RISCV::VFCVT_XU_F_V:
548 case RISCV::VFCVT_X_F_V:
549 case RISCV::VFCVT_RTZ_XU_F_V:
550 case RISCV::VFCVT_RTZ_X_F_V:
551 case RISCV::VFCVT_F_XU_V:
552 case RISCV::VFCVT_F_X_V:
553 // Vector Floating-Point Merge Instruction
554 case RISCV::VFMERGE_VFM:
555 // Vector count population in mask vcpop.m
556 // vfirst find-first-set mask bit
557 case RISCV::VCPOP_M:
558 case RISCV::VFIRST_M:
559 // Vector Bit-manipulation Instructions (Zvbb)
560 // Vector And-Not
561 case RISCV::VANDN_VV:
562 case RISCV::VANDN_VX:
563 // Vector Reverse Bits in Elements
564 case RISCV::VBREV_V:
565 // Vector Reverse Bits in Bytes
566 case RISCV::VBREV8_V:
567 // Vector Reverse Bytes
568 case RISCV::VREV8_V:
569 // Vector Count Leading Zeros
570 case RISCV::VCLZ_V:
571 // Vector Count Trailing Zeros
572 case RISCV::VCTZ_V:
573 // Vector Population Count
574 case RISCV::VCPOP_V:
575 // Vector Rotate Left
576 case RISCV::VROL_VV:
577 case RISCV::VROL_VX:
578 // Vector Rotate Right
579 case RISCV::VROR_VI:
580 case RISCV::VROR_VV:
581 case RISCV::VROR_VX:
582 // Vector Carry-less Multiplication Instructions (Zvbc)
583 // Vector Carry-less Multiply
584 case RISCV::VCLMUL_VV:
585 case RISCV::VCLMUL_VX:
586 // Vector Carry-less Multiply Return High Half
587 case RISCV::VCLMULH_VV:
588 case RISCV::VCLMULH_VX:
589 return MILog2SEW;
590
591 // Vector Widening Shift Left Logical (Zvbb)
592 case RISCV::VWSLL_VI:
593 case RISCV::VWSLL_VX:
594 case RISCV::VWSLL_VV:
595 // Vector Widening Integer Add/Subtract
596 // Def uses EEW=2*SEW . Operands use EEW=SEW.
597 case RISCV::VWADDU_VV:
598 case RISCV::VWADDU_VX:
599 case RISCV::VWSUBU_VV:
600 case RISCV::VWSUBU_VX:
601 case RISCV::VWADD_VV:
602 case RISCV::VWADD_VX:
603 case RISCV::VWSUB_VV:
604 case RISCV::VWSUB_VX:
605 // Vector Widening Integer Multiply Instructions
606 // Destination EEW=2*SEW. Source EEW=SEW.
607 case RISCV::VWMUL_VV:
608 case RISCV::VWMUL_VX:
609 case RISCV::VWMULSU_VV:
610 case RISCV::VWMULSU_VX:
611 case RISCV::VWMULU_VV:
612 case RISCV::VWMULU_VX:
613 // Vector Widening Integer Multiply-Add Instructions
614 // Destination EEW=2*SEW. Source EEW=SEW.
615 // A SEW-bit*SEW-bit multiply of the sources forms a 2*SEW-bit value, which
616 // is then added to the 2*SEW-bit Dest. These instructions never have a
617 // passthru operand.
618 case RISCV::VWMACCU_VV:
619 case RISCV::VWMACCU_VX:
620 case RISCV::VWMACC_VV:
621 case RISCV::VWMACC_VX:
622 case RISCV::VWMACCSU_VV:
623 case RISCV::VWMACCSU_VX:
624 case RISCV::VWMACCUS_VX:
625 // Vector Widening Floating-Point Fused Multiply-Add Instructions
626 case RISCV::VFWMACC_VF:
627 case RISCV::VFWMACC_VV:
628 case RISCV::VFWNMACC_VF:
629 case RISCV::VFWNMACC_VV:
630 case RISCV::VFWMSAC_VF:
631 case RISCV::VFWMSAC_VV:
632 case RISCV::VFWNMSAC_VF:
633 case RISCV::VFWNMSAC_VV:
634 case RISCV::VFWMACCBF16_VV:
635 case RISCV::VFWMACCBF16_VF:
636 // Vector Widening Floating-Point Add/Subtract Instructions
637 // Dest EEW=2*SEW. Source EEW=SEW.
638 case RISCV::VFWADD_VV:
639 case RISCV::VFWADD_VF:
640 case RISCV::VFWSUB_VV:
641 case RISCV::VFWSUB_VF:
642 // Vector Widening Floating-Point Multiply
643 case RISCV::VFWMUL_VF:
644 case RISCV::VFWMUL_VV:
645 // Widening Floating-Point/Integer Type-Convert Instructions
646 case RISCV::VFWCVT_XU_F_V:
647 case RISCV::VFWCVT_X_F_V:
648 case RISCV::VFWCVT_RTZ_XU_F_V:
649 case RISCV::VFWCVT_RTZ_X_F_V:
650 case RISCV::VFWCVT_F_XU_V:
651 case RISCV::VFWCVT_F_X_V:
652 case RISCV::VFWCVT_F_F_V:
653 case RISCV::VFWCVTBF16_F_F_V:
654 return IsMODef ? MILog2SEW + 1 : MILog2SEW;
655
656 // Def and Op1 uses EEW=2*SEW. Op2 uses EEW=SEW.
657 case RISCV::VWADDU_WV:
658 case RISCV::VWADDU_WX:
659 case RISCV::VWSUBU_WV:
660 case RISCV::VWSUBU_WX:
661 case RISCV::VWADD_WV:
662 case RISCV::VWADD_WX:
663 case RISCV::VWSUB_WV:
664 case RISCV::VWSUB_WX:
665 // Vector Widening Floating-Point Add/Subtract Instructions
666 case RISCV::VFWADD_WF:
667 case RISCV::VFWADD_WV:
668 case RISCV::VFWSUB_WF:
669 case RISCV::VFWSUB_WV: {
670 bool IsOp1 = (HasPassthru && !IsTied) ? MO.getOperandNo() == 2
671 : MO.getOperandNo() == 1;
672 bool TwoTimes = IsMODef || IsOp1;
673 return TwoTimes ? MILog2SEW + 1 : MILog2SEW;
674 }
675
676 // Vector Integer Extension
677 case RISCV::VZEXT_VF2:
678 case RISCV::VSEXT_VF2:
679 return getIntegerExtensionOperandEEW(2, MI, MO);
680 case RISCV::VZEXT_VF4:
681 case RISCV::VSEXT_VF4:
682 return getIntegerExtensionOperandEEW(4, MI, MO);
683 case RISCV::VZEXT_VF8:
684 case RISCV::VSEXT_VF8:
685 return getIntegerExtensionOperandEEW(8, MI, MO);
686
687 // Vector Narrowing Integer Right Shift Instructions
688 // Destination EEW=SEW, Op 1 has EEW=2*SEW. Op2 has EEW=SEW
689 case RISCV::VNSRL_WX:
690 case RISCV::VNSRL_WI:
691 case RISCV::VNSRL_WV:
692 case RISCV::VNSRA_WI:
693 case RISCV::VNSRA_WV:
694 case RISCV::VNSRA_WX:
695 // Vector Narrowing Fixed-Point Clip Instructions
696 // Destination and Op1 EEW=SEW. Op2 EEW=2*SEW.
697 case RISCV::VNCLIPU_WI:
698 case RISCV::VNCLIPU_WV:
699 case RISCV::VNCLIPU_WX:
700 case RISCV::VNCLIP_WI:
701 case RISCV::VNCLIP_WV:
702 case RISCV::VNCLIP_WX:
703 // Narrowing Floating-Point/Integer Type-Convert Instructions
704 case RISCV::VFNCVT_XU_F_W:
705 case RISCV::VFNCVT_X_F_W:
706 case RISCV::VFNCVT_RTZ_XU_F_W:
707 case RISCV::VFNCVT_RTZ_X_F_W:
708 case RISCV::VFNCVT_F_XU_W:
709 case RISCV::VFNCVT_F_X_W:
710 case RISCV::VFNCVT_F_F_W:
711 case RISCV::VFNCVT_ROD_F_F_W:
712 case RISCV::VFNCVTBF16_F_F_W: {
713 assert(!IsTied);
714 bool IsOp1 = HasPassthru ? MO.getOperandNo() == 2 : MO.getOperandNo() == 1;
715 bool TwoTimes = IsOp1;
716 return TwoTimes ? MILog2SEW + 1 : MILog2SEW;
717 }
718
719 // Vector Mask Instructions
720 // Vector Mask-Register Logical Instructions
721 // vmsbf.m set-before-first mask bit
722 // vmsif.m set-including-first mask bit
723 // vmsof.m set-only-first mask bit
724 // EEW=1
725 // We handle the cases when operand is a v0 mask operand above the switch,
726 // but these instructions may use non-v0 mask operands and need to be handled
727 // specifically.
728 case RISCV::VMAND_MM:
729 case RISCV::VMNAND_MM:
730 case RISCV::VMANDN_MM:
731 case RISCV::VMXOR_MM:
732 case RISCV::VMOR_MM:
733 case RISCV::VMNOR_MM:
734 case RISCV::VMORN_MM:
735 case RISCV::VMXNOR_MM:
736 case RISCV::VMSBF_M:
737 case RISCV::VMSIF_M:
738 case RISCV::VMSOF_M: {
739 return MILog2SEW;
740 }
741
742 // Vector Compress Instruction
743 // EEW=SEW, except the mask operand has EEW=1. Mask operand is not handled
744 // before this switch.
745 case RISCV::VCOMPRESS_VM:
746 return MO.getOperandNo() == 3 ? 0 : MILog2SEW;
747
748 // Vector Iota Instruction
749 // EEW=SEW, except the mask operand has EEW=1. Mask operand is not handled
750 // before this switch.
751 case RISCV::VIOTA_M: {
752 if (IsMODef || MO.getOperandNo() == 1)
753 return MILog2SEW;
754 return 0;
755 }
756
757 // Vector Integer Compare Instructions
758 // Dest EEW=1. Source EEW=SEW.
759 case RISCV::VMSEQ_VI:
760 case RISCV::VMSEQ_VV:
761 case RISCV::VMSEQ_VX:
762 case RISCV::VMSNE_VI:
763 case RISCV::VMSNE_VV:
764 case RISCV::VMSNE_VX:
765 case RISCV::VMSLTU_VV:
766 case RISCV::VMSLTU_VX:
767 case RISCV::VMSLT_VV:
768 case RISCV::VMSLT_VX:
769 case RISCV::VMSLEU_VV:
770 case RISCV::VMSLEU_VI:
771 case RISCV::VMSLEU_VX:
772 case RISCV::VMSLE_VV:
773 case RISCV::VMSLE_VI:
774 case RISCV::VMSLE_VX:
775 case RISCV::VMSGTU_VI:
776 case RISCV::VMSGTU_VX:
777 case RISCV::VMSGT_VI:
778 case RISCV::VMSGT_VX:
779 // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
780 // Dest EEW=1. Source EEW=SEW. Mask source operand handled above this switch.
781 case RISCV::VMADC_VIM:
782 case RISCV::VMADC_VVM:
783 case RISCV::VMADC_VXM:
784 case RISCV::VMSBC_VVM:
785 case RISCV::VMSBC_VXM:
786 // Dest EEW=1. Source EEW=SEW.
787 case RISCV::VMADC_VV:
788 case RISCV::VMADC_VI:
789 case RISCV::VMADC_VX:
790 case RISCV::VMSBC_VV:
791 case RISCV::VMSBC_VX:
792 // 13.13. Vector Floating-Point Compare Instructions
793 // Dest EEW=1. Source EEW=SEW
794 case RISCV::VMFEQ_VF:
795 case RISCV::VMFEQ_VV:
796 case RISCV::VMFNE_VF:
797 case RISCV::VMFNE_VV:
798 case RISCV::VMFLT_VF:
799 case RISCV::VMFLT_VV:
800 case RISCV::VMFLE_VF:
801 case RISCV::VMFLE_VV:
802 case RISCV::VMFGT_VF:
803 case RISCV::VMFGE_VF: {
804 if (IsMODef)
805 return 0;
806 return MILog2SEW;
807 }
808
809 // Vector Reduction Operations
810 // Vector Single-Width Integer Reduction Instructions
811 case RISCV::VREDAND_VS:
812 case RISCV::VREDMAX_VS:
813 case RISCV::VREDMAXU_VS:
814 case RISCV::VREDMIN_VS:
815 case RISCV::VREDMINU_VS:
816 case RISCV::VREDOR_VS:
817 case RISCV::VREDSUM_VS:
818 case RISCV::VREDXOR_VS:
819 // Vector Single-Width Floating-Point Reduction Instructions
820 case RISCV::VFREDMAX_VS:
821 case RISCV::VFREDMIN_VS:
822 case RISCV::VFREDOSUM_VS:
823 case RISCV::VFREDUSUM_VS: {
824 return MILog2SEW;
825 }
826
827 // Vector Widening Integer Reduction Instructions
828 // The Dest and VS1 read only element 0 for the vector register. Return
829 // 2*EEW for these. VS2 has EEW=SEW and EMUL=LMUL.
830 case RISCV::VWREDSUM_VS:
831 case RISCV::VWREDSUMU_VS:
832 // Vector Widening Floating-Point Reduction Instructions
833 case RISCV::VFWREDOSUM_VS:
834 case RISCV::VFWREDUSUM_VS: {
835 bool TwoTimes = IsMODef || MO.getOperandNo() == 3;
836 return TwoTimes ? MILog2SEW + 1 : MILog2SEW;
837 }
838
839 // Vector Register Gather with 16-bit Index Elements Instruction
840 // Dest and source data EEW=SEW. Index vector EEW=16.
841 case RISCV::VRGATHEREI16_VV: {
842 if (MO.getOperandNo() == 2)
843 return 4;
844 return MILog2SEW;
845 }
846
847 default:
848 return std::nullopt;
849 }
850}
851
852static std::optional<OperandInfo> getOperandInfo(const MachineOperand &MO) {
853 const MachineInstr &MI = *MO.getParent();
855 RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
856 assert(RVV && "Could not find MI in PseudoTable");
857
858 std::optional<unsigned> Log2EEW = getOperandLog2EEW(MO);
859 if (!Log2EEW)
860 return std::nullopt;
861
862 switch (RVV->BaseInstr) {
863 // Vector Reduction Operations
864 // Vector Single-Width Integer Reduction Instructions
865 // Vector Widening Integer Reduction Instructions
866 // Vector Widening Floating-Point Reduction Instructions
867 // The Dest and VS1 only read element 0 of the vector register. Return just
868 // the EEW for these.
869 case RISCV::VREDAND_VS:
870 case RISCV::VREDMAX_VS:
871 case RISCV::VREDMAXU_VS:
872 case RISCV::VREDMIN_VS:
873 case RISCV::VREDMINU_VS:
874 case RISCV::VREDOR_VS:
875 case RISCV::VREDSUM_VS:
876 case RISCV::VREDXOR_VS:
877 case RISCV::VWREDSUM_VS:
878 case RISCV::VWREDSUMU_VS:
879 case RISCV::VFWREDOSUM_VS:
880 case RISCV::VFWREDUSUM_VS:
881 if (MO.getOperandNo() != 2)
882 return OperandInfo(*Log2EEW);
883 break;
884 };
885
886 // All others have EMUL=EEW/SEW*LMUL
887 return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(*Log2EEW, MI), *Log2EEW);
888}
889
890static bool isTupleInsertInstr(const MachineInstr &MI);
891
892/// Return true if we can reason about demanded VLs elementwise for \p MI.
893bool RISCVVLOptimizer::isSupportedInstr(const MachineInstr &MI) const {
894 if (MI.isPHI() || MI.isFullCopy() || isTupleInsertInstr(MI))
895 return true;
896
897 unsigned RVVOpc = RISCV::getRVVMCOpcode(MI.getOpcode());
898 if (!RVVOpc)
899 return false;
900
901 assert(!(MI.getNumExplicitDefs() == 0 && !MI.mayStore() &&
902 !RISCVII::elementsDependOnVL(TII->get(RVVOpc).TSFlags)) &&
903 "No defs but elements don't depend on VL?");
904
905 // TODO: Reduce vl for vmv.s.x and vfmv.s.f. Currently this introduces more vl
906 // toggles, we need to extend PRE in RISCVInsertVSETVLI first.
907 if (RVVOpc == RISCV::VMV_S_X || RVVOpc == RISCV::VFMV_S_F)
908 return false;
909
910 if (RISCVII::elementsDependOnVL(TII->get(RVVOpc).TSFlags))
911 return false;
912
913 if (MI.mayStore())
914 return false;
915
916 return true;
917}
918
919/// Return true if MO is a vector operand but is used as a scalar operand.
921 const MachineInstr *MI = MO.getParent();
923 RISCVVPseudosTable::getPseudoInfo(MI->getOpcode());
924
925 if (!RVV)
926 return false;
927
928 switch (RVV->BaseInstr) {
929 // Reductions only use vs1[0] of vs1
930 case RISCV::VREDAND_VS:
931 case RISCV::VREDMAX_VS:
932 case RISCV::VREDMAXU_VS:
933 case RISCV::VREDMIN_VS:
934 case RISCV::VREDMINU_VS:
935 case RISCV::VREDOR_VS:
936 case RISCV::VREDSUM_VS:
937 case RISCV::VREDXOR_VS:
938 case RISCV::VWREDSUM_VS:
939 case RISCV::VWREDSUMU_VS:
940 case RISCV::VFREDMAX_VS:
941 case RISCV::VFREDMIN_VS:
942 case RISCV::VFREDOSUM_VS:
943 case RISCV::VFREDUSUM_VS:
944 case RISCV::VFWREDOSUM_VS:
945 case RISCV::VFWREDUSUM_VS:
946 return MO.getOperandNo() == 3;
947 case RISCV::VMV_X_S:
948 case RISCV::VFMV_F_S:
949 return MO.getOperandNo() == 1;
950 default:
951 return false;
952 }
953}
954
955bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
956 const MCInstrDesc &Desc = MI.getDesc();
957 if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags))
958 return false;
959
960 if (MI.getNumExplicitDefs() != 1)
961 return false;
962
963 // Some instructions have implicit defs e.g. $vxsat. If they might be read
964 // later then we can't reduce VL.
965 if (!MI.allImplicitDefsAreDead()) {
966 LLVM_DEBUG(dbgs() << "Not a candidate because has non-dead implicit def\n");
967 return false;
968 }
969
970 if (MI.mayRaiseFPException()) {
971 LLVM_DEBUG(dbgs() << "Not a candidate because may raise FP exception\n");
972 return false;
973 }
974
975 for (const MachineMemOperand *MMO : MI.memoperands()) {
976 if (MMO->isVolatile()) {
977 LLVM_DEBUG(dbgs() << "Not a candidate because contains volatile MMO\n");
978 return false;
979 }
980 }
981
982 if (!isSupportedInstr(MI)) {
983 LLVM_DEBUG(dbgs() << "Not a candidate due to unsupported instruction: "
984 << MI);
985 return false;
986 }
987
989 TII->get(RISCV::getRVVMCOpcode(MI.getOpcode())).TSFlags) &&
990 "Instruction shouldn't be supported if elements depend on VL");
991
993 MRI->getRegClass(MI.getOperand(0).getReg())->TSFlags) &&
994 "All supported instructions produce a vector register result");
995
996 LLVM_DEBUG(dbgs() << "Found a candidate for VL reduction: " << MI << "\n");
997 return true;
998}
999
1000/// Given a vslidedown.vx like:
1001///
1002/// %slideamt = ADDI %x, -1
1003/// %v = PseudoVSLIDEDOWN_VX %passthru, %src, %slideamt, avl=1
1004///
1005/// %v will only read the first %slideamt + 1 lanes of %src, which = %x.
1006/// This is a common case when lowering extractelement.
1007///
1008/// Note that if %x is 0, %slideamt will be all ones. In this case %src will be
1009/// completely slid down and none of its lanes will be read (since %slideamt is
1010/// greater than the largest VLMAX of 65536) so we can demand any minimum VL.
1011static std::optional<DemandedVL>
1013 const MachineRegisterInfo *MRI) {
1014 const MachineInstr &MI = *UserOp.getParent();
1015 if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VSLIDEDOWN_VX)
1016 return std::nullopt;
1017 // We're looking at what lanes are used from the src operand.
1018 if (UserOp.getOperandNo() != 2)
1019 return std::nullopt;
1020 // For now, the AVL must be 1.
1021 const MachineOperand &AVL = MI.getOperand(4);
1022 if (!AVL.isImm() || AVL.getImm() != 1)
1023 return std::nullopt;
1024 // The slide amount must be %x - 1.
1025 const MachineOperand &SlideAmt = MI.getOperand(3);
1026 if (!SlideAmt.getReg().isVirtual())
1027 return std::nullopt;
1028 MachineInstr *SlideAmtDef = MRI->getUniqueVRegDef(SlideAmt.getReg());
1029 if (SlideAmtDef->getOpcode() != RISCV::ADDI ||
1030 SlideAmtDef->getOperand(2).getImm() != -AVL.getImm() ||
1031 !SlideAmtDef->getOperand(1).getReg().isVirtual())
1032 return std::nullopt;
1033 return SlideAmtDef->getOperand(1);
1034}
1035
1036DemandedVL
1037RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const {
1038 const MachineInstr &UserMI = *UserOp.getParent();
1039 const MCInstrDesc &Desc = UserMI.getDesc();
1040
1041 if (UserMI.isPHI() || UserMI.isFullCopy() || isTupleInsertInstr(UserMI))
1042 return DemandedVLs.lookup(&UserMI);
1043
1044 if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) {
1045 LLVM_DEBUG(dbgs() << " Abort due to lack of VL, assume that"
1046 " use VLMAX\n");
1047 return DemandedVL::vlmax();
1048 }
1049
1050 if (auto VL = getMinimumVLForVSLIDEDOWN_VX(UserOp, MRI))
1051 return *VL;
1052
1054 TII->get(RISCV::getRVVMCOpcode(UserMI.getOpcode())).TSFlags)) {
1055 LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n");
1056 return DemandedVL::vlmax();
1057 }
1058
1059 unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
1060 const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
1061 // Looking for an immediate or a register VL that isn't X0.
1062 assert((!VLOp.isReg() || VLOp.getReg() != RISCV::X0) &&
1063 "Did not expect X0 VL");
1064
1065 // If the user is a passthru it will read the elements past VL, so
1066 // abort if any of the elements past VL are demanded.
1067 if (UserOp.isTied()) {
1068 assert(UserOp.getOperandNo() == UserMI.getNumExplicitDefs() &&
1070 if (!RISCV::isVLKnownLE(DemandedVLs.lookup(&UserMI).VL, VLOp)) {
1071 LLVM_DEBUG(dbgs() << " Abort because user is passthru in "
1072 "instruction with demanded tail\n");
1073 return DemandedVL::vlmax();
1074 }
1075 }
1076
1077 // Instructions like reductions may use a vector register as a scalar
1078 // register. In this case, we should treat it as only reading the first lane.
1079 if (isVectorOpUsedAsScalarOp(UserOp)) {
1080 LLVM_DEBUG(dbgs() << " Used this operand as a scalar operand\n");
1081 return MachineOperand::CreateImm(1);
1082 }
1083
1084 // If we know the demanded VL of UserMI, then we can reduce the VL it
1085 // requires.
1086 if (RISCV::isVLKnownLE(DemandedVLs.lookup(&UserMI).VL, VLOp))
1087 return DemandedVLs.lookup(&UserMI);
1088
1089 return VLOp;
1090}
1091
1092/// Return true if MI is an instruction used for assembling registers
1093/// for segmented store instructions, namely, RISCVISD::TUPLE_INSERT.
1094/// Currently it's lowered to INSERT_SUBREG.
1096 if (!MI.isInsertSubreg())
1097 return false;
1098
1099 const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
1100 const TargetRegisterClass *DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
1101 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
1102 if (!RISCVRI::isVRegClass(DstRC->TSFlags))
1103 return false;
1104 unsigned NF = RISCVRI::getNF(DstRC->TSFlags);
1105 if (NF < 2)
1106 return false;
1107
1108 // Check whether INSERT_SUBREG has the correct subreg index for tuple inserts.
1109 auto VLMul = RISCVRI::getLMul(DstRC->TSFlags);
1110 unsigned SubRegIdx = MI.getOperand(3).getImm();
1111 [[maybe_unused]] auto [LMul, IsFractional] = RISCVVType::decodeVLMUL(VLMul);
1112 assert(!IsFractional && "unexpected LMUL for tuple register classes");
1113 return TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock * LMul;
1114}
1115
1117 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
1118 case VSSEG_CASES(8):
1119 case VSSSEG_CASES(8):
1120 case VSUXSEG_CASES(8):
1121 case VSOXSEG_CASES(8):
1122 case VSSEG_CASES(16):
1123 case VSSSEG_CASES(16):
1124 case VSUXSEG_CASES(16):
1125 case VSOXSEG_CASES(16):
1126 case VSSEG_CASES(32):
1127 case VSSSEG_CASES(32):
1128 case VSUXSEG_CASES(32):
1129 case VSOXSEG_CASES(32):
1130 case VSSEG_CASES(64):
1131 case VSSSEG_CASES(64):
1132 case VSUXSEG_CASES(64):
1133 case VSOXSEG_CASES(64):
1134 return true;
1135 default:
1136 return false;
1137 }
1138}
1139
1140bool RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
1141 if (MI.isPHI() || MI.isFullCopy() || isTupleInsertInstr(MI))
1142 return true;
1143
1144 SmallSetVector<MachineOperand *, 8> OpWorklist;
1145 SmallPtrSet<const MachineInstr *, 4> PHISeen;
1146 for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg()))
1147 OpWorklist.insert(&UserOp);
1148
1149 while (!OpWorklist.empty()) {
1150 MachineOperand &UserOp = *OpWorklist.pop_back_val();
1151 const MachineInstr &UserMI = *UserOp.getParent();
1152 LLVM_DEBUG(dbgs() << " Checking user: " << UserMI << "\n");
1153
1154 if (UserMI.isFullCopy() && UserMI.getOperand(0).getReg().isVirtual()) {
1155 LLVM_DEBUG(dbgs() << " Peeking through uses of COPY\n");
1157 MRI->use_operands(UserMI.getOperand(0).getReg())));
1158 continue;
1159 }
1160
1161 if (isTupleInsertInstr(UserMI)) {
1162 LLVM_DEBUG(dbgs().indent(4) << "Peeking through uses of INSERT_SUBREG\n");
1163 for (MachineOperand &UseOp :
1164 MRI->use_operands(UserMI.getOperand(0).getReg())) {
1165 const MachineInstr &CandidateMI = *UseOp.getParent();
1166 // We should not propagate the VL if the user is not a segmented store
1167 // or another INSERT_SUBREG, since VL just works differently
1168 // between segmented operations (per-field) v.s. other RVV ops (on the
1169 // whole register group).
1170 if (!isTupleInsertInstr(CandidateMI) &&
1171 !isSegmentedStoreInstr(CandidateMI))
1172 return false;
1173 OpWorklist.insert(&UseOp);
1174 }
1175 continue;
1176 }
1177
1178 if (UserMI.isPHI()) {
1179 // Don't follow PHI cycles
1180 if (!PHISeen.insert(&UserMI).second)
1181 continue;
1182 LLVM_DEBUG(dbgs() << " Peeking through uses of PHI\n");
1184 MRI->use_operands(UserMI.getOperand(0).getReg())));
1185 continue;
1186 }
1187
1188 if (!RISCVII::hasSEWOp(UserMI.getDesc().TSFlags)) {
1189 LLVM_DEBUG(dbgs() << " Abort due to lack of SEW operand\n");
1190 return false;
1191 }
1192
1193 std::optional<OperandInfo> ConsumerInfo = getOperandInfo(UserOp);
1194 std::optional<OperandInfo> ProducerInfo = getOperandInfo(MI.getOperand(0));
1195 if (!ConsumerInfo || !ProducerInfo) {
1196 LLVM_DEBUG(dbgs() << " Abort due to unknown operand information.\n");
1197 LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n");
1198 LLVM_DEBUG(dbgs() << " ProducerInfo is: " << ProducerInfo << "\n");
1199 return false;
1200 }
1201
1202 if (!OperandInfo::areCompatible(*ProducerInfo, *ConsumerInfo)) {
1203 LLVM_DEBUG(
1204 dbgs()
1205 << " Abort due to incompatible information for EMUL or EEW.\n");
1206 LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n");
1207 LLVM_DEBUG(dbgs() << " ProducerInfo is: " << ProducerInfo << "\n");
1208 return false;
1209 }
1210 }
1211
1212 return true;
1213}
1214
1215bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI,
1216 MachineOperand CommonVL) const {
1217 LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI);
1218
1219 unsigned VLOpNum = RISCVII::getVLOpNum(MI.getDesc());
1220 MachineOperand &VLOp = MI.getOperand(VLOpNum);
1221
1222 // If the VL is 1, then there is no need to reduce it. This is an
1223 // optimization, not needed to preserve correctness.
1224 if (VLOp.isImm() && VLOp.getImm() == 1) {
1225 LLVM_DEBUG(dbgs() << " Abort due to VL == 1, no point in reducing.\n");
1226 return false;
1227 }
1228
1229 assert((CommonVL.isImm() || CommonVL.getReg().isVirtual()) &&
1230 "Expected VL to be an Imm or virtual Reg");
1231
1232 // If the VL is defined by a vleff that doesn't dominate MI, try using the
1233 // vleff's AVL. It will be greater than or equal to the output VL.
1234 if (CommonVL.isReg()) {
1235 const MachineInstr *VLMI = MRI->getVRegDef(CommonVL.getReg());
1236 if (RISCVInstrInfo::isFaultOnlyFirstLoad(*VLMI) &&
1237 !MDT->dominates(VLMI, &MI))
1238 CommonVL = VLMI->getOperand(RISCVII::getVLOpNum(VLMI->getDesc()));
1239 }
1240
1241 if (!RISCV::isVLKnownLE(CommonVL, VLOp)) {
1242 LLVM_DEBUG(dbgs() << " Abort due to CommonVL not <= VLOp.\n");
1243 return false;
1244 }
1245
1246 if (CommonVL.isIdenticalTo(VLOp)) {
1247 LLVM_DEBUG(
1248 dbgs() << " Abort due to CommonVL == VLOp, no point in reducing.\n");
1249 return false;
1250 }
1251
1252 if (CommonVL.isImm()) {
1253 LLVM_DEBUG(dbgs() << " Reduce VL from " << VLOp << " to "
1254 << CommonVL.getImm() << " for " << MI << "\n");
1255 VLOp.ChangeToImmediate(CommonVL.getImm());
1256 return true;
1257 }
1258 const MachineInstr *VLMI = MRI->getVRegDef(CommonVL.getReg());
1259 if (!MDT->dominates(VLMI, &MI)) {
1260 LLVM_DEBUG(dbgs() << " Abort due to VL not dominating.\n");
1261 return false;
1262 }
1263 LLVM_DEBUG(dbgs() << " Reduce VL from " << VLOp << " to "
1264 << printReg(CommonVL.getReg(), MRI->getTargetRegisterInfo())
1265 << " for " << MI << "\n");
1266
1267 // All our checks passed. We can reduce VL.
1268 VLOp.ChangeToRegister(CommonVL.getReg(), false);
1269 MRI->constrainRegClass(CommonVL.getReg(), &RISCV::GPRNoX0RegClass);
1270 return true;
1271}
1272
1273static bool isPhysical(const MachineOperand &MO) {
1274 return MO.isReg() && MO.getReg().isPhysical();
1275}
1276
1277/// Look through \p MI's operands and propagate what it demands to its uses.
1278void RISCVVLOptimizer::transfer(const MachineInstr &MI) {
1279 if (!isSupportedInstr(MI) || !checkUsers(MI) || any_of(MI.defs(), isPhysical))
1280 DemandedVLs[&MI] = DemandedVL::vlmax();
1281
1282 for (const MachineOperand &MO : virtual_vec_uses(MI)) {
1283 const MachineInstr *Def = MRI->getVRegDef(MO.getReg());
1284 DemandedVL Prev = DemandedVLs[Def];
1285 DemandedVLs[Def] = DemandedVLs[Def].max(getMinimumVLForUser(MO));
1286 if (DemandedVLs[Def] != Prev)
1287 Worklist.insert(Def);
1288 }
1289}
1290
1291bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
1292 if (skipFunction(MF.getFunction()))
1293 return false;
1294
1295 MRI = &MF.getRegInfo();
1296 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
1297
1298 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
1299 if (!ST.hasVInstructions())
1300 return false;
1301
1302 TII = ST.getInstrInfo();
1303
1304 assert(DemandedVLs.empty());
1305
1306 // For each instruction that defines a vector, propagate the VL it
1307 // uses to its inputs.
1308 for (MachineBasicBlock *MBB : post_order(&MF)) {
1310 for (MachineInstr &MI : reverse(*MBB))
1311 if (!MI.isDebugInstr())
1312 Worklist.insert(&MI);
1313 }
1314
1315 while (!Worklist.empty()) {
1316 const MachineInstr *MI = Worklist.front();
1317 Worklist.remove(MI);
1318 transfer(*MI);
1319 }
1320
1321 // Then go through and see if we can reduce the VL of any instructions to
1322 // only what's demanded.
1323 bool MadeChange = false;
1324 for (auto &[MI, VL] : DemandedVLs) {
1325 assert(MDT->isReachableFromEntry(MI->getParent()));
1326 if (!isCandidate(*MI))
1327 continue;
1328 if (!tryReduceVL(*const_cast<MachineInstr *>(MI), VL.VL))
1329 continue;
1330 MadeChange = true;
1331 }
1332
1333 DemandedVLs.clear();
1334 return MadeChange;
1335}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static std::optional< DemandedVL > getMinimumVLForVSLIDEDOWN_VX(const MachineOperand &UserOp, const MachineRegisterInfo *MRI)
Given a vslidedown.vx like:
static unsigned getIntegerExtensionOperandEEW(unsigned Factor, const MachineInstr &MI, const MachineOperand &MO)
Dest has EEW=SEW.
static std::optional< OperandInfo > getOperandInfo(const MachineOperand &MO)
#define VSOXSEG_CASES(EEW)
static bool isSegmentedStoreInstr(const MachineInstr &MI)
static bool isVectorOpUsedAsScalarOp(const MachineOperand &MO)
Return true if MO is a vector operand but is used as a scalar operand.
static std::optional< unsigned > getOperandLog2EEW(const MachineOperand &MO)
static std::pair< unsigned, bool > getEMULEqualsEEWDivSEWTimesLMUL(unsigned Log2EEW, const MachineInstr &MI)
Return EMUL = (EEW / SEW) * LMUL where EEW comes from Log2EEW and LMUL and SEW are from the TSFlags o...
#define VSUXSEG_CASES(EEW)
static bool isPhysical(const MachineOperand &MO)
#define VSSSEG_CASES(EEW)
#define VSSEG_CASES(EEW)
static bool isTupleInsertInstr(const MachineInstr &MI)
Return true if MI is an instruction used for assembling registers for segmented store instructions,...
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
#define PASS_NAME
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
bool isReachableFromEntry(const NodeT *A) const
isReachableFromEntry - Return true if A is dominated by the entry block of the function containing it...
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
Describe properties that are true of each instruction in the target description file.
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:86
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isFullCopy() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI unsigned getNumExplicitDefs() const
Returns the number of non-implicit definitions.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
A vector that has set insertion semantics.
Definition SetVector.h:57
void insert_range(Range &&R)
Definition SetVector.h:176
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
value_type pop_back_val()
Definition SetVector.h:279
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
TargetInstrInfo - Interface to description of machine instruction set.
const uint8_t TSFlags
Configurable target specific flags.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
static bool readsPastVL(uint64_t TSFlags)
static bool isTiedPseudo(uint64_t TSFlags)
static RISCVVType::VLMUL getLMul(uint64_t TSFlags)
static unsigned getVLOpNum(const MCInstrDesc &Desc)
static bool hasVLOp(uint64_t TSFlags)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
static bool elementsDependOnVL(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
static unsigned getNF(uint8_t TSFlags)
static bool isVRegClass(uint8_t TSFlags)
static RISCVVType::VLMUL getLMul(uint8_t TSFlags)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
bool isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS)
Given two VL operands, do we know that LHS <= RHS?
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2128
FunctionPass * createRISCVVLOptimizerPass()
iterator_range< po_iterator< T > > post_order(const T &G)
Op::Description Desc
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition STLExtras.h:552
@ Other
Any other memory.
Definition ModRef.h:68
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:368
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.